-
Notifications
You must be signed in to change notification settings - Fork 0
/
bibliography.bib
1446 lines (1353 loc) · 153 KB
/
bibliography.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
@book{abu-mostafaLearningData2012a,
title = {Learning {{From Data}}},
author = {Abu-Mostafa, Yaser S. and Magdon-Ismail, Malik and Lin, Hsuan-Tien},
date = {2012-01-01},
publisher = {{AMLBook}},
location = {{S.l.}},
isbn = {978-1-60049-006-4},
langid = {english},
pagetotal = {213}
}
@inproceedings{akibaOptunaNextgenerationHyperparameter2019,
title = {Optuna: {{A Next-generation Hyperparameter Optimization Framework}}},
shorttitle = {Optuna},
booktitle = {Proceedings of the 25th {{ACM SIGKDD International Conference}} on {{Knowledge Discovery}} \& {{Data Mining}}},
author = {Akiba, Takuya and Sano, Shotaro and Yanase, Toshihiko and Ohta, Takeru and Koyama, Masanori},
date = {2019-07-25},
series = {{{KDD}} '19},
pages = {2623--2631},
publisher = {{Association for Computing Machinery}},
location = {{New York, NY, USA}},
doi = {10.1145/3292500.3330701},
url = {https://doi.org/10.1145/3292500.3330701},
urldate = {2022-11-16},
abstract = {The purpose of this study is to introduce new design-criteria for next-generation hyperparameter optimization software. The criteria we propose include (1) define-by-run API that allows users to construct the parameter search space dynamically, (2) efficient implementation of both searching and pruning strategies, and (3) easy-to-setup, versatile architecture that can be deployed for various purposes, ranging from scalable distributed computing to light-weight experiment conducted via interactive interface. In order to prove our point, we will introduce Optuna, an optimization software which is a culmination of our effort in the development of a next generation optimization software. As an optimization software designed with define-by-run principle, Optuna is particularly the first of its kind. We will present the design-techniques that became necessary in the development of the software that meets the above criteria, and demonstrate the power of our new design through experimental results and real world applications. Our software is available under the MIT license (https://github.com/pfnet/optuna/).},
isbn = {978-1-4503-6201-6},
keywords = {Bayesian optimization,black-box optimization,hyperparameter optimization,machine learning system},
file = {/Users/michelsen/Zotero/storage/94LWRTM9/akiba2019.pdf.pdf;/Users/michelsen/Zotero/storage/Q5BCLKT4/Akiba et al. - 2019 - Optuna A Next-generation Hyperparameter Optimizat.pdf}
}
@article{al-asadiInferenceVisualizationDNA,
title = {Inference and Visualization of {{DNA}} Damage Patterns Using a Grade of Membership Model},
author = {Al-Asadi, Hussein and Dey, Kushal K and Novembre, John and Stephens, Matthew},
pages = {7},
abstract = {Motivation: Quality control plays a major role in the analysis of ancient DNA (aDNA). One key step in this quality control is assessment of DNA damage: aDNA contains unique signatures of DNA damage that distinguish it from modern DNA, and so analyses of damage patterns can help confirm that DNA sequences obtained are from endogenous aDNA rather than from modern contamination. Predominant signatures of DNA damage include a high frequency of cytosine to thymine substitutions (C-to-T) at the ends of fragments, and elevated rates of purines (A \& G) before the 50 strand-breaks. Existing QC procedures help assess damage by simply plotting for each sample, the C-to-T mismatch rate along the read and the composition of bases before the 50 strand-breaks. Here we present a more flexible and comprehensive model-based approach to infer and visualize damage patterns in aDNA, implemented in an R package aRchaic. This approach is based on a ‘grade of membership’ model (also known as ‘admixture’ or ‘topic’ model) in which each sample has an estimated grade of membership in each of K damage profiles that are estimated from the data.},
langid = {english},
keywords = {adna,ancient dna,damage,damage pattern,mismatch},
file = {/Users/michelsen/Zotero/storage/UXRKCH8R/Al-Asadi et al. - Inference and visualization of DNA damage patterns.pdf}
}
@thesis{al-nakeebMachineLearningTools2017,
type = {phdthesis},
title = {Machine {{Learning Tools}} for {{DNA Sequence Analysis}}},
author = {Al-Nakeeb, Kosai},
date = {2017-12},
institution = {{Technical University of Copenhagen}},
location = {{Department of Bio and Health Informatics}}
}
@misc{anaemiasNutritionalAnaemiasReport1968,
title = {Nutritional Anaemias : Report of a {{WHO}} Scientific Group [Meeting Held in {{Geneva}} from 13 to 17 {{March}} 1967]},
author = {Anaemias, WHO Scientific Group on Nutritional and Organization, World Health},
date = {1968},
series = {World {{Health Organization}} Technical Report Series ; No. 405},
pages = {Russian version of nos. 404-438 bound together (barcode no. 00073779)},
publisher = {{World Health Organization}}
}
@article{andersonTrackingCellSurface1992,
title = {Tracking of Cell Surface Receptors by Fluorescence Digital Imaging Microscopy Using a Charge-Coupled Device Camera. {{Low-density}} Lipoprotein and Influenza Virus Receptor Mobility at 4 Degrees {{C}}},
author = {Anderson, C.M. and Georgiou, G.N. and Morrison, I.E. and Stevenson, G.V. and Cherry, R.J.},
date = {1992-02-01},
journaltitle = {Journal of Cell Science},
shortjournal = {Journal of Cell Science},
volume = {101},
number = {2},
pages = {415--425},
issn = {0021-9533},
doi = {10.1242/jcs.101.2.415},
url = {https://doi.org/10.1242/jcs.101.2.415},
urldate = {2022-11-18},
abstract = {A fluorescence imaging system, based on using a cooled slow-scan CCD camera, has been developed for tracking receptors on the surfaces of living cells. The technique is applicable to receptors for particles such as lipoproteins and viruses that can be labeled with a few tens of fluorophores. The positions of single particles in each image are determined to within 25 nm by fitting the fluorescence distribution to a two-dimensional Gaussian function. This procedure also provides an accurate measure of intensity, which is used as a tag for automated tracking of particles from frame to frame. The method is applied to an investigation of the mobility of receptors for LDL and influenza virus particles on human dermal fibroblasts at 4 degrees C. In contrast to previous studies by FRAP (fluorescence recovery after photo-bleaching), it is found that receptors have a low but measurable mobility at 4 degrees C. Analysis of individual particle tracks indicates that whilst some receptors undergo random diffusion, others undergo directed motion (flow) or diffusion restricted to a domain. A procedure is proposed for subdividing receptors according to their different types of motion and hence determining their motional parameters. The finding that receptors are not completely immobilised at 4 degrees C is significant for studies of receptor distributions performed at this temperature.},
file = {/Users/michelsen/Zotero/storage/XGPKUI2R/Anderson et al. - 1992 - Tracking of cell surface receptors by fluorescence.pdf;/Users/michelsen/Zotero/storage/SM8HT9EX/Tracking-of-cell-surface-receptors-by-fluorescence.html}
}
@article{bagerRiskHospitalisationAssociated2021,
title = {Risk of Hospitalisation Associated with Infection with {{SARS-CoV-2}} Lineage {{B}}.1.1.7 in {{Denmark}}: An Observational Cohort Study},
shorttitle = {Risk of Hospitalisation Associated with Infection with {{SARS-CoV-2}} Lineage {{B}}.1.1.7 in {{Denmark}}},
author = {Bager, Peter and Wohlfahrt, Jan and Fonager, Jannik and Rasmussen, Morten and Albertsen, Mads and Michaelsen, Thomas Yssing and Møller, Camilla Holten and Ethelberg, Steen and Legarth, Rebecca and Button, Mia Sarah Fischer and Gubbels, Sophie and Voldstedlund, Marianne and Mølbak, Kåre and Skov, Robert Leo and Fomsgaard, Anders and Krause, Tyra Grove},
date = {2021-11-01},
journaltitle = {The Lancet Infectious Diseases},
shortjournal = {The Lancet Infectious Diseases},
volume = {21},
number = {11},
pages = {1507--1517},
issn = {1473-3099},
doi = {10.1016/S1473-3099(21)00290-5},
abstract = {Background The more infectious SARS-CoV-2 lineage B.1.1.7 rapidly spread in Europe after December, 2020, and a concern that B.1.1.7 could cause more severe disease has been raised. Taking advantage of Denmark's high RT-PCR testing and whole genome sequencing capacities, we used national health register data to assess the risk of COVID-19 hospitalisation in individuals infected with B.1.1.7 compared with those with other SARS-CoV-2 lineages. Methods We did an observational cohort study of all SARS-CoV-2-positive cases confirmed by RT-PCR in Denmark, sampled between Jan 1 and March 24, 2021, with 14 days of follow-up for COVID-19 hospitalisation. Cases were identified in the national COVID-19 surveillance system database, which includes data from the Danish Microbiology Database (RT-PCR test results), the Danish COVID-19 Genome Consortium, the National Patient Registry, the Civil Registration System, as well as other nationwide registers. Among all cases, COVID-19 hospitalisation was defined as first admission lasting longer than 12 h within 14 days of a sample with a positive RT-PCR result. The study population and main analysis were restricted to the proportion of cases with viral genome data. We calculated the risk ratio (RR) of admission according to infection with B.1.1.7 versus other co-existing lineages with a Poisson regression model with robust SEs, adjusted a priori for sex, age, calendar time, region, and comorbidities. The contribution of each covariate to confounding of the crude RR was evaluated afterwards by a stepwise forward inclusion. Findings Between Jan 1 and March 24, 2021, 50\,958 individuals with a positive SARS-CoV-2 test and at least 14 days of follow-up for hospitalisation were identified; 30\,572 (60·0\%) had genome data, of whom 10\,544 (34·5\%) were infected with B.1.1.7. 1944 (6·4\%) individuals had a COVID-19 hospitalisation and of these, 571 (29·4\%) had a B.1.1.7 infection and 1373 (70·6\%) had an infection with other SARS-CoV-2 lineages. Although the overall number of hospitalisations decreased during the study period, the proportion of individuals infected with B.1.1.7 increased from 3·5\% to 92·1\% per week. B.1.1.7 was associated with a crude RR of hospital admission of 0·79 (95\% CI 0·72–0·87; p{$<$}0·0001) and an adjusted RR of 1·42 (95\% CI 1·25–1·60; p{$<$}0·0001). The adjusted RR was increased in all strata of age and calendar period—the two covariates with the largest contribution to confounding of the crude RR. Interpretation Infection with SARS-CoV-2 lineage B.1.1.7 was associated with an increased risk of hospitalisation compared with that of other lineages in an analysis adjusted for covariates. The overall effect on hospitalisations in Denmark was lessened due to a strict lockdown, but our findings could support hospital preparedness and modelling of the projected impact of the epidemic in countries with uncontrolled spread of B.1.1.7. Funding None.},
langid = {english},
keywords = {corona,covid},
file = {/Users/michelsen/Zotero/storage/5LV28UGM/bager2021.pdf.pdf;/Users/michelsen/Zotero/storage/6XZGIJYZ/Bager et al. - 2021 - Risk of hospitalisation associated with infection .pdf;/Users/michelsen/Zotero/storage/TCG623TX/S1473309921002905.html}
}
@thesis{bakerInferenceDiffusionCoefficients2021,
type = {phdthesis},
title = {Inference of {{Diffusion Coefficients}} from {{Single Particle Trajectories}}},
author = {Baker, Lewis R.},
date = {2021},
institution = {{University of Colorado, Boulder}},
url = {https://scholar.colorado.edu/concern/graduate_thesis_or_dissertations/bc386k398},
abstract = {Systems driven by Brownian motion are ubiquitous. A prevailing challenge is inferring, from data, the parameters that describe these stochastic processes. In this work, we investigate a switch diffusion process that arises in the context of single particle tracking (SPT), wherein the motion of a particle is governed by a discrete set of diffusive states, and the tendency of the particle to switch between these states is modeled as a random process. From such experiments, it is desirable to identify the number of underlying diffusive states, quantitate each state through its diffusion coefficient, and characterize the nature of transitions between different states.We present two models to describe this phenomenon: a mixture model and a hidden Markov model (HMM). For both, we adopt a Bayesian approach to sample the distributions of the underlying parameters, implementing a Markov Chain Monte Carlo (MCMC) scheme to estimate the posterior distributions of the model parameters, and leverage our analysis to investigate the problem of determining the number of diffusive states using model selection criteria. We explore the robustness of the inference scheme by testing it on an ensemble of simulated trajectories.Finally, we present a detailed analysis of a collection of experimentally measured trajectories of phosphoinositide-dependent kinase-1 (PDK-1) on a synthetic membrane with concentrations of protein kinase C (PKC) near the K1/2, obtained using total internal reflection fluorescence (TIRF) microscopy. In this particular experimental setup, PKC-1 is believed to exhibit three distinct modes of diffusion. Our analysis finds a two-state model to be a more parsimonious fit of the data, illustrating a significant challenge to parameter inference. Nevertheless, our analysis permits quantitation of kinetic rate constants which, to our knowledge, have not been measured for this system from single particle tracking experiments.},
langid = {english},
pagetotal = {71},
keywords = {Applied mathematics,Biochemistry,diffusion,hidden markov model,inference,Molecular biology},
file = {/Users/michelsen/Zotero/storage/BASXG3GD/bc386k398.html}
}
@book{barlowStatisticsGuideUse1993,
title = {Statistics: {{A Guide}} to the {{Use}} of {{Statistical Methods}} in the {{Physical Sciences}}},
shorttitle = {Statistics},
author = {Barlow, R. J.},
date = {1993-11-01},
publisher = {{Wiley}},
location = {{Chichester, England ; New York}},
isbn = {978-0-471-92295-7},
langid = {english},
pagetotal = {222}
}
@inproceedings{bergstraAlgorithmsHyperParameterOptimization2011,
title = {Algorithms for {{Hyper-Parameter Optimization}}},
booktitle = {Advances in {{Neural Information Processing Systems}}},
author = {Bergstra, James and Bardenet, Rémi and Bengio, Yoshua and Kégl, Balázs},
date = {2011},
volume = {24},
publisher = {{Curran Associates, Inc.}},
url = {https://papers.nips.cc/paper/2011/hash/86e8f7ab32cfd12577bc2619bc635690-Abstract.html},
urldate = {2022-11-16},
abstract = {Several recent advances to the state of the art in image classification benchmarks have come from better configurations of existing techniques rather than novel approaches to feature learning. Traditionally, hyper-parameter optimization has been the job of humans because they can be very efficient in regimes where only a few trials are possible. Presently, computer clusters and GPU processors make it possible to run more trials and we show that algorithmic approaches can find better results. We present hyper-parameter optimization results on tasks of training neural networks and deep belief networks (DBNs). We optimize hyper-parameters using random search and two new greedy sequential methods based on the expected improvement criterion. Random search has been shown to be sufficiently efficient for learning neural networks for several datasets, but we show it is unreliable for training DBNs. The sequential algorithms are applied to the most difficult DBN learning problems from [Larochelle et al., 2007] and find significantly better results than the best previously reported. This work contributes novel techniques for making response surface models P (y|x) in which many elements of hyper-parameter assignment (x) are known to be irrelevant given particular values of other elements.},
file = {/Users/michelsen/Zotero/storage/RAC8JZWL/Bergstra et al. - 2011 - Algorithms for Hyper-Parameter Optimization.pdf}
}
@article{bergstraRandomSearchHyperparameter2012a,
title = {Random Search for Hyper-Parameter Optimization},
author = {Bergstra, James and Bengio, Yoshua},
date = {2012},
journaltitle = {Journal of Machine Learning Research},
volume = {13},
number = {10},
pages = {281--305},
url = {http://jmlr.org/papers/v13/bergstra12a.html}
}
@unpublished{betancourtConceptualIntroductionHamiltonian2018,
title = {A {{Conceptual Introduction}} to {{Hamiltonian Monte Carlo}}},
author = {Betancourt, Michael},
date = {2018-07-15},
eprint = {1701.02434},
eprinttype = {arxiv},
primaryclass = {stat},
abstract = {Hamiltonian Monte Carlo has proven a remarkable empirical success, but only recently have we begun to develop a rigorous understanding of why it performs so well on difficult problems and how it is best applied in practice. Unfortunately, that understanding is confined within the mathematics of differential geometry which has limited its dissemination, especially to the applied communities for which it is particularly important. In this review I provide a comprehensive conceptual account of these theoretical foundations, focusing on developing a principled intuition behind the method and its optimal implementations rather of any exhaustive rigor. Whether a practitioner or a statistician, the dedicated reader will acquire a solid grasp of how Hamiltonian Monte Carlo works, when it succeeds, and, perhaps most importantly, when it fails.},
archiveprefix = {arXiv},
keywords = {HMC,Statistics - Methodology},
file = {/Users/michelsen/Zotero/storage/7N3P7B7B/Betancourt - 2018 - A Conceptual Introduction to Hamiltonian Monte Car.pdf;/Users/michelsen/Zotero/storage/FUWHE69E/1701.html}
}
@article{bezansonJuliaFreshApproach2017,
title = {Julia: {{A}} Fresh Approach to Numerical Computing},
author = {Bezanson, Jeff and Edelman, Alan and Karpinski, Stefan and Shah, Viral B},
date = {2017},
journaltitle = {SIAM review},
volume = {59},
number = {1},
pages = {65--98},
publisher = {{SIAM}},
url = {https://julialang.org/},
keywords = {julia}
}
@article{binghamPyroDeepUniversal2019,
title = {Pyro: {{Deep}} Universal Probabilistic Programming},
author = {Bingham, Eli and Chen, Jonathan P. and Jankowiak, Martin and Obermeyer, Fritz and Pradhan, Neeraj and Karaletsos, Theofanis and Singh, Rohit and Szerlip, Paul A. and Horsfall, Paul and Goodman, Noah D.},
date = {2019},
journaltitle = {Journal of Machine Learning Research},
shortjournal = {J. Mach. Learn. Res.},
volume = {20},
pages = {28:1--28:6},
url = {http://jmlr.org/papers/v20/18-403.html}
}
@article{borryPyDamageAutomatedAncient2021,
title = {{{PyDamage}}: Automated Ancient Damage Identification and Estimation for Contigs in Ancient {{DNA}} de Novo Assembly},
shorttitle = {{{PyDamage}}},
author = {Borry, Maxime and Hübner, Alexander and Rohrlach, Adam B. and Warinner, Christina},
date = {2021-07-27},
journaltitle = {PeerJ},
shortjournal = {PeerJ},
volume = {9},
pages = {e11845},
publisher = {{PeerJ Inc.}},
issn = {2167-8359},
doi = {10.7717/peerj.11845},
abstract = {DNA de novo assembly can be used to reconstruct longer stretches of DNA (contigs), including genes and even genomes, from short DNA sequencing reads. Applying this technique to metagenomic data derived from archaeological remains, such as paleofeces and dental calculus, we can investigate past microbiome functional diversity that may be absent or underrepresented in the modern microbiome gene catalogue. However, compared to modern samples, ancient samples are often burdened with environmental contamination, resulting in metagenomic datasets that represent mixtures of ancient and modern DNA. The ability to rapidly and reliably establish the authenticity and integrity of ancient samples is essential for ancient DNA studies, and the ability to distinguish between ancient and modern sequences is particularly important for ancient microbiome studies. Characteristic patterns of ancient DNA damage, namely DNA fragmentation and cytosine deamination (observed as C-to-T transitions) are typically used to authenticate ancient samples and sequences, but existing tools for inspecting and filtering aDNA damage either compute it at the read level, which leads to high data loss and lower quality when used in combination with de novo assembly, or require manual inspection, which is impractical for ancient assemblies that typically contain tens to hundreds of thousands of contigs. To address these challenges, we designed PyDamage, a robust, automated approach for aDNA damage estimation and authentication of de novo assembled aDNA. PyDamage uses a likelihood ratio based approach to discriminate between truly ancient contigs and contigs originating from modern contamination. We test PyDamage on both on simulated aDNA data and archaeological paleofeces, and we demonstrate its ability to reliably and automatically identify contigs bearing DNA damage characteristic of aDNA. Coupled with aDNA de novo assembly, Pydamage opens up new doors to explore functional diversity in ancient metagenomic datasets.},
langid = {english},
file = {/Users/michelsen/Zotero/storage/PG43CP9L/Borry et al. - 2021 - PyDamage automated ancient damage identification .pdf}
}
@software{bradburyJAXComposableTransformations2018,
title = {{{JAX}}: Composable Transformations of {{Python NumPy}} Programs},
author = {Bradbury, James and Frostig, Roy and Hawkins, Peter and Johnson, Matthew James and Leary, Chris and Maclaurin, Dougal and Necula, George and Paszke, Adam and Vander\{P\}las, Jake and Wanderman-\{M\}ilne, Skye and Zhang, Qiao},
date = {2018},
url = {http://github.com/google/jax},
version = {0.2.5}
}
@article{briggsPatternsDamageGenomic2007,
title = {Patterns of Damage in Genomic {{DNA}} Sequences from a {{Neandertal}}},
author = {Briggs, Adrian W. and Stenzel, Udo and Johnson, Philip L. F. and Green, Richard E. and Kelso, Janet and Prüfer, Kay and Meyer, Matthias and Krause, Johannes and Ronan, Michael T. and Lachmann, Michael and Pääbo, Svante},
date = {2007-09-11},
journaltitle = {Proceedings of the National Academy of Sciences of the United States of America},
shortjournal = {Proc Natl Acad Sci U S A},
volume = {104},
number = {37},
eprint = {17715061},
eprinttype = {pmid},
pages = {14616--14621},
issn = {0027-8424},
doi = {10.1073/pnas.0704665104},
abstract = {High-throughput direct sequencing techniques have recently opened the possibility to sequence genomes from Pleistocene organisms. Here we analyze DNA sequences determined from a Neandertal, a mammoth, and a cave bear. We show that purines are overrepresented at positions adjacent to the breaks in the ancient DNA, suggesting that depurination has contributed to its degradation. We furthermore show that substitutions resulting from miscoding cytosine residues are vastly overrepresented in the DNA sequences and drastically clustered in the ends of the molecules, whereas other substitutions are rare. We present a model where the observed substitution patterns are used to estimate the rate of deamination of cytosine residues in single- and double-stranded portions of the DNA, the length of single-stranded ends, and the frequency of nicks. The results suggest that reliable genome sequences can be obtained from Pleistocene organisms.},
pmcid = {PMC1976210},
keywords = {adna,ancient dna,damage,damage pattern,mismatch},
file = {/Users/michelsen/Zotero/storage/8RS8X78U/Briggs et al. - 2007 - Patterns of damage in genomic DNA sequences from a.pdf}
}
@misc{brochuTutorialBayesianOptimization2010a,
title = {A {{Tutorial}} on {{Bayesian Optimization}} of {{Expensive Cost Functions}}, with {{Application}} to {{Active User Modeling}} and {{Hierarchical Reinforcement Learning}}},
author = {Brochu, Eric and Cora, Vlad M. and de Freitas, Nando},
options = {useprefix=true},
date = {2010-12-12},
number = {arXiv:1012.2599},
eprint = {1012.2599},
eprinttype = {arxiv},
primaryclass = {cs},
publisher = {{arXiv}},
doi = {10.48550/arXiv.1012.2599},
url = {http://arxiv.org/abs/1012.2599},
urldate = {2022-11-16},
abstract = {We present a tutorial on Bayesian optimization, a method of finding the maximum of expensive cost functions. Bayesian optimization employs the Bayesian technique of setting a prior over the objective function and combining it with evidence to get a posterior function. This permits a utility-based selection of the next observation to make on the objective function, which must take into account both exploration (sampling from areas of high uncertainty) and exploitation (sampling areas likely to offer improvement over the current best observation). We also present two detailed extensions of Bayesian optimization, with experiments---active user modelling with preferences, and hierarchical reinforcement learning---and a discussion of the pros and cons of Bayesian optimization based on our experiences.},
archiveprefix = {arXiv},
keywords = {Computer Science - Machine Learning,G.1.6,G.3,I.2.6},
file = {/Users/michelsen/Zotero/storage/TFYS4CFH/Brochu et al. - 2010 - A Tutorial on Bayesian Optimization of Expensive C.pdf;/Users/michelsen/Zotero/storage/AIQ2PYTE/1012.html}
}
@article{carpenterStanProbabilisticProgramming2017,
title = {Stan: {{A}} Probabilistic Programming Language},
author = {Carpenter, Bob and Gelman, Andrew and Hoffman, Matthew D and Lee, Daniel and Goodrich, Ben and Betancourt, Michael and Brubaker, Marcus and Guo, Jiqiang and Li, Peter and Riddell, Allen},
date = {2017},
journaltitle = {Journal of statistical software},
volume = {76},
number = {1},
publisher = {{Columbia Univ., New York, NY (United States); Harvard Univ., Cambridge, MA (United States)}}
}
@article{cepeda-cuervoDoubleGeneralizedBetaBinomial2017,
title = {Double {{Generalized Beta-Binomial}} and {{Negative Binomial Regression Models}}},
author = {Cepeda-Cuervo, Edilberto and Cifuentes-Amado, MARíA VICTORIA},
date = {2017-01},
journaltitle = {Revista Colombiana de Estadística},
volume = {40},
number = {1},
pages = {141--163},
publisher = {{Universidad Nacional de Colombia.}},
issn = {0120-1751},
doi = {10.15446/rce.v40n1.61779},
langid = {english},
file = {/Users/michelsen/Zotero/storage/HRDDWJPK/Cepeda-Cuervo and Cifuentes-Amado - 2017 - Double Generalized Beta-Binomial and Negative Bino.pdf;/Users/michelsen/Zotero/storage/4ZKDB6Y5/scielo.html}
}
@article{dabneyAncientDNADamage2013,
title = {Ancient {{DNA Damage}}},
author = {Dabney, Jesse and Meyer, Matthias and Pääbo, Svante},
date = {2013-07},
journaltitle = {Cold Spring Harbor Perspectives in Biology},
shortjournal = {Cold Spring Harb Perspect Biol},
volume = {5},
number = {7},
eprint = {23729639},
eprinttype = {pmid},
pages = {a012567},
issn = {1943-0264},
doi = {10.1101/cshperspect.a012567},
abstract = {Under favorable conditions DNA can survive for thousands of years in the remains of dead organisms. The DNA extracted from such remains is invariably degraded to a small average size by processes that at least partly involve depurination. It also contains large amounts of deaminated cytosine residues that are accumulated toward the ends of the molecules, as well as several other lesions that are less well characterized., DNA fragments from ancient specimens are short (40–500 bp) and contain lesions that block DNA polymerases and cause replication errors. Degradation involves depurination and cytosine deamination, but other processes may be involved.},
pmcid = {PMC3685887},
keywords = {mikkel},
file = {/Users/michelsen/Zotero/storage/K6IAFPED/Dabney et al. - 2013 - Ancient DNA Damage.pdf;/Users/michelsen/Zotero/storage/ZWXUHXEU/dabney2013.pdf.pdf}
}
@article{dabneyCompleteMitochondrialGenome2013,
title = {Complete Mitochondrial Genome Sequence of a {{Middle Pleistocene}} Cave Bear Reconstructed from Ultrashort {{DNA}} Fragments},
author = {Dabney, Jesse and Knapp, Michael and Glocke, Isabelle and Gansauge, Marie-Theres and Weihmann, Antje and Nickel, Birgit and Valdiosera, Cristina and García, Nuria and Pääbo, Svante and Arsuaga, Juan-Luis and Meyer, Matthias},
date = {2013-09-24},
journaltitle = {Proceedings of the National Academy of Sciences},
volume = {110},
number = {39},
pages = {15758--15763},
publisher = {{Proceedings of the National Academy of Sciences}},
doi = {10.1073/pnas.1314445110},
abstract = {Although an inverse relationship is expected in ancient DNA samples between the number of surviving DNA fragments and their length, ancient DNA sequencing libraries are strikingly deficient in molecules shorter than 40 bp. We find that a loss of short molecules can occur during DNA extraction and present an improved silica-based extraction protocol that enables their efficient retrieval. In combination with single-stranded DNA library preparation, this method enabled us to reconstruct the mitochondrial genome sequence from a Middle Pleistocene cave bear (Ursus deningeri) bone excavated at Sima de los Huesos in the Sierra de Atapuerca, Spain. Phylogenetic reconstructions indicate that the U. deningeri sequence forms an early diverging sister lineage to all Western European Late Pleistocene cave bears. Our results prove that authentic ancient DNA can be preserved for hundreds of thousand years outside of permafrost. Moreover, the techniques presented enable the retrieval of phylogenetically informative sequences from samples in which virtually all DNA is diminished to fragments shorter than 50 bp.},
file = {/Users/michelsen/Zotero/storage/QIP3ZZVT/dabney2013.pdf.pdf;/Users/michelsen/Zotero/storage/UZ69KHPF/Dabney et al. - 2013 - Complete mitochondrial genome sequence of a Middle.pdf}
}
@article{daleyModelingGenomeCoverage2014,
title = {Modeling Genome Coverage in Single-Cell Sequencing},
author = {Daley, Timothy and Smith, Andrew D.},
date = {2014-11-15},
journaltitle = {Bioinformatics},
shortjournal = {Bioinformatics},
volume = {30},
number = {22},
eprint = {25107873},
eprinttype = {pmid},
pages = {3159--3165},
issn = {1367-4803},
doi = {10.1093/bioinformatics/btu540},
abstract = {Motivation: Single-cell DNA sequencing is necessary for examining genetic variation at the cellular level, which remains hidden in bulk sequencing experiments. But because they begin with such small amounts of starting material, the amount of information that is obtained from single-cell sequencing experiment is highly sensitive to the choice of protocol employed and variability in library preparation. In particular, the fraction of the genome represented in single-cell sequencing libraries exhibits extreme variability due to quantitative biases in amplification and loss of genetic material., Results: We propose a method to predict the genome coverage of a deep sequencing experiment using information from an initial shallow sequencing experiment mapped to a reference genome. The observed coverage statistics are used in a non-parametric empirical Bayes Poisson model to estimate the gain in coverage from deeper sequencing. This approach allows researchers to know statistical features of deep sequencing experiments without actually sequencing deeply, providing a basis for optimizing and comparing single-cell sequencing protocols or screening libraries., Availability and implementation: The method is available as part of the preseq software package. Source code is available at http://smithlabresearch.org/preseq., Contact: [email protected], Supplementary information: Supplementary material is available at Bioinformatics online.},
pmcid = {PMC4221128},
file = {/Users/michelsen/Zotero/storage/6ZK3XXI9/Daley and Smith - 2014 - Modeling genome coverage in single-cell sequencing.pdf}
}
@software{dembinskiScikithepIminuitV22021,
title = {Scikit-Hep/Iminuit: V2.8.2},
shorttitle = {Scikit-Hep/Iminuit},
author = {Dembinski, Hans and Piti Ongmongkolkul and Deil, Christoph and Hurtado, David Menéndez and Schreiner, Henry and Feickert, Matthew and Andrew and Burr, Chris and Watson, Jason and Rost, Fabian and Pearce, Alex and Geiger, Lukas and Wiedemann, Bernhard M. and Gohlke, Christoph and Gonzalo and Drotleff, Jonas and Eschle, Jonas and Neste, Ludwig and Gorelli, Marco Edward and Baak, Max and Zapata, Omar and Odidev},
date = {2021-08-15},
doi = {10.5281/ZENODO.3949207},
abstract = {See changelog on RTD},
organization = {{Zenodo}},
version = {v2.8.2},
keywords = {iminuit}
}
@article{dietzDirectedFactorGraph2022,
title = {Directed Factor Graph Notation for Generative Models},
author = {Dietz, Laura},
date = {2022-11-02},
abstract = {We introduce the directed factor graph notation, a visual language for specifying the generative process of a probabilistic model. In contrast to boiler plate diagrams, directed factor graphs provide more information about the generative process, allowing to judge the complexity of the model at a glance.}
}
@article{flagelUnreasonableEffectivenessConvolutional2018,
title = {The {{Unreasonable Effectiveness}} of {{Convolutional Neural Networks}} in {{Population Genetic Inference}}},
author = {Flagel, Lex and Brandvain, Yaniv J and Schrider, Daniel R},
date = {2018-11-27},
journaltitle = {bioRxiv},
doi = {10.1101/336073},
abstract = {Population-scale genomic datasets have given researchers incredible amounts of information from which to infer evolutionary histories. Concomitant with this flood of data, theoretical and methodological advances have sought to extract information from genomic sequences to infer demographic events such as population size changes and gene flow among closely related populations/species, construct recombination maps, and uncover loci underlying recent adaptation. To date most methods make use of only one or a few summaries of the input sequences and therefore ignore potentially useful information encoded in the data. The most sophisticated of these approaches involve likelihood calculations, which require theoretical advances for each new problem, and often focus on a single aspect of the data (e.g. only allele frequency information) in the interest of mathematical and computational tractability. Directly interrogating the entirety of the input sequence data in a likelihood-free manner would thus offer a fruitful alternative. Here we accomplish this by representing DNA sequence alignments as images and using a class of deep learning methods called convolutional neural networks (CNNs) to make population genetic inferences from these images. We apply CNNs to a number of evolutionary questions and find that they frequently match or exceed the accuracy of current methods. Importantly, we show that CNNs perform accurate evolutionary model selection and parameter estimation, even on problems that have not received detailed theoretical treatments. Thus, when applied to population genetic alignments, CNN are capable of outperforming expert-derived statistical methods, and offer a new path forward in cases where no likelihood approach exists.},
langid = {english},
file = {/Users/michelsen/Zotero/storage/74GZM445/Flagel et al. - 2018 - The Unreasonable Effectiveness of Convolutional Ne.pdf}
}
@book{gelmanBayesianDataAnalysis2015a,
title = {Bayesian {{Data Analysis}}},
author = {Gelman, Andrew and Carlin, John B. and Stern, Hal S. and Dunson, David B. and Vehtari, Aki and Rubin, Donald B.},
date = {2015-07-06},
edition = {3},
publisher = {{Chapman and Hall/CRC}},
location = {{New York}},
doi = {10.1201/b16018},
abstract = {Winner of the 2016 De Groot Prize from the International Society for Bayesian AnalysisNow in its third edition, this classic book is widely considered the leading text on Bayesian methods, lauded for its accessible, practical approach to analyzing data and solving research problems. Bayesian Data Analysis, Third Edition continues to take an applied},
isbn = {978-0-429-11307-9},
pagetotal = {675}
}
@article{gelmanUnderstandingPredictiveInformation2014,
title = {Understanding Predictive Information Criteria for {{Bayesian}} Models},
author = {Gelman, Andrew and Hwang, Jessica and Vehtari, Aki},
date = {2014-11},
journaltitle = {Statistics and Computing},
shortjournal = {Stat Comput},
volume = {24},
number = {6},
pages = {997--1016},
issn = {0960-3174, 1573-1375},
doi = {10.1007/s11222-013-9416-2},
abstract = {We review the Akaike, deviance, and Watanabe-Akaike information criteria from a Bayesian perspective, where the goal is to estimate expected out-of-sample-prediction error using a biascorrected adjustment of within-sample error. We focus on the choices involved in setting up these measures, and we compare them in three simple examples, one theoretical and two applied. The contribution of this review is to put all these information criteria into a Bayesian predictive context and to better understand, through small examples, how these methods can apply in practice.},
langid = {english},
keywords = {waic},
file = {/Users/michelsen/Zotero/storage/D23ANURG/Gelman et al. - 2014 - Understanding predictive information criteria for .pdf}
}
@article{gelmanUnderstandingPredictiveInformation2014a,
title = {Understanding Predictive Information Criteria for {{Bayesian}} Models},
author = {Gelman, Andrew and Hwang, Jessica and Vehtari, Aki},
date = {2014-11-01},
journaltitle = {Statistics and Computing},
shortjournal = {Stat Comput},
volume = {24},
number = {6},
pages = {997--1016},
issn = {1573-1375},
doi = {10.1007/s11222-013-9416-2},
url = {https://doi.org/10.1007/s11222-013-9416-2},
urldate = {2022-11-02},
abstract = {We review the Akaike, deviance, and Watanabe-Akaike information criteria from a Bayesian perspective, where the goal is to estimate expected out-of-sample-prediction error using a bias-corrected adjustment of within-sample error. We focus on the choices involved in setting up these measures, and we compare them in three simple examples, one theoretical and two applied. The contribution of this paper is to put all these information criteria into a Bayesian predictive context and to better understand, through small examples, how these methods can apply in practice.},
langid = {english},
keywords = {AIC,Bayes,Cross-validation,DIC,Prediction,WAIC},
file = {/Users/michelsen/Zotero/storage/YH794826/gelman2013.pdf.pdf}
}
@online{genomicsBriefHistoryNext2021,
title = {A Brief History of {{Next Generation Sequencing}} ({{NGS}})},
author = {Genomics, Front Line and Mobley, Immy},
date = {2021-07-26T11:46:18+00:00},
url = {https://frontlinegenomics.com/a-brief-history-of-next-generation-sequencing-ngs/},
urldate = {2022-11-22},
abstract = {It is now possible to sequence an entire genome in just one day, due to the advent of next generation sequencing (NGS).},
langid = {english},
organization = {{Front Line Genomics}},
file = {/Users/michelsen/Zotero/storage/H2JFWMDM/a-brief-history-of-next-generation-sequencing-ngs.html}
}
@inproceedings{geTuringLanguageFlexible2018,
title = {Turing: {{A Language}} for {{Flexible Probabilistic Inference}}},
shorttitle = {Turing},
booktitle = {Proceedings of the {{Twenty-First International Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
author = {Ge, Hong and Xu, Kai and Ghahramani, Zoubin},
date = {2018-03-31},
pages = {1682--1690},
publisher = {{PMLR}},
issn = {2640-3498},
url = {https://proceedings.mlr.press/v84/ge18b.html},
urldate = {2022-11-16},
abstract = {Probabilistic programming promises to simplify and democratize probabilistic machine learning, but successful probabilistic programming systems require flexible, generic and efficient inference engines. In this work, we present a system called Turing for building MCMC algorithms for probabilistic programming inference. Turing has a very simple syntax and makes full use of the numerical capabilities in the Julia programming language, including all implemented probability distributions, and automatic differentiation. Turing supports a wide range of popular Monte Carlo algorithms, including Hamiltonian Monte Carlo (HMC), HMC with No-U-Turns (NUTS), Gibbs sampling, sequential Monte Carlo (SMC), and several particle MCMC (PMCMC) samplers. Most importantly, Turing inference is composable: it combines MCMC operations on subsets of variables, for example using a combination of an HMC engine and a particle Gibbs (PG) engine. We explore several combinations of inference methods with the aim of finding approaches that are both efficient and universal, i.e. applicable to arbitrary probabilistic models. NUTS—a popular variant of HMC that adapts Hamiltonian simulation path length automatically, although quite powerful for exploring differentiable target distributions, is however not universal. We identify some failure modes for the NUTS engine, and demonstrate that composition of PG (for discrete variables) and NUTS (for continuous variables) can be useful when the NUTS engine is either not applicable, or simply does not work well. Our aim is to present Turing and its composable inference engines to the world and encourage other researchers to build on this system to help advance the field of probabilistic machine learning.},
eventtitle = {International {{Conference}} on {{Artificial Intelligence}} and {{Statistics}}},
langid = {english},
file = {/Users/michelsen/Zotero/storage/GC5T4QAA/Ge et al. - 2018 - Turing A Language for Flexible Probabilistic Infe.pdf}
}
@article{gilbertAssessingAncientDNA2005,
title = {Assessing Ancient {{DNA}} Studies},
author = {Gilbert, M. Thomas P. and Bandelt, Hans-Jürgen and Hofreiter, Michael and Barnes, Ian},
date = {2005-10-01},
journaltitle = {Trends in Ecology \& Evolution},
shortjournal = {Trends in Ecology \& Evolution},
volume = {20},
number = {10},
pages = {541--544},
issn = {0169-5347},
doi = {10.1016/j.tree.2005.07.005},
url = {https://www.sciencedirect.com/science/article/pii/S0169534705002260},
urldate = {2022-11-14},
abstract = {The study of ancient DNA has the potential to make significant and unique contributions to ecology and evolution. However, the techniques used contain inherent problems, particularly with regards to the generation of authentic and useful data. The solution currently advocated to reduce contamination and artefactual results is to adopt criteria for authentication. Nevertheless, these criteria are not foolproof, and we believe that they have, in practice, replaced the use of thought and prudence when designing and executing ancient DNA studies. We argue here that researchers in this field must take a more cognitive and self-critical approach. Specifically, in place of checking criteria off lists, researchers must explain, in sufficient enough detail to dispel doubt, how the data were obtained, and why they should be believed to be authentic.},
langid = {english},
file = {/Users/michelsen/Zotero/storage/Q3SKQL5V/gilbert2005.pdf.pdf;/Users/michelsen/Zotero/storage/YX29XGBI/Gilbert et al. - 2005 - Assessing ancient DNA studies.pdf;/Users/michelsen/Zotero/storage/75LJBXEP/S0169534705002260.html}
}
@article{gillespieExactStochasticSimulation1977,
title = {Exact Stochastic Simulation of Coupled Chemical Reactions},
author = {Gillespie, Daniel T.},
date = {1977-12-01},
journaltitle = {The Journal of Physical Chemistry},
shortjournal = {J. Phys. Chem.},
volume = {81},
number = {25},
pages = {2340--2361},
publisher = {{American Chemical Society}},
issn = {0022-3654},
doi = {10.1021/j100540a008},
url = {https://doi.org/10.1021/j100540a008},
urldate = {2022-11-17},
file = {/Users/michelsen/Zotero/storage/IMY69NTV/gillespie1977.pdf.pdf;/Users/michelsen/Zotero/storage/UP4JKA3N/Gillespie - 1977 - Exact stochastic simulation of coupled chemical re.pdf;/Users/michelsen/Zotero/storage/ZD28CYZP/j100540a008.html}
}
@book{hastieElementsStatisticalLearning2016,
title = {The {{Elements}} of {{Statistical Learning}}},
author = {Hastie, Trevor and Tibshirani, Robert and Friedman, Jerome},
date = {2016},
series = {Springer {{Series}} in {{Statistics}}},
publisher = {{Springer}},
location = {{New York, NY}},
doi = {10.1007/978-0-387-84858-7},
url = {http://link.springer.com/10.1007/978-0-387-84858-7},
urldate = {2022-11-16},
isbn = {978-0-387-84857-0},
keywords = {Averaging,Boosting,classification,clustering,data mining,machine learning,Projection pursuit,Random Forest,supervised learning,Support Vector Machine,unsupervised learning},
file = {/Users/michelsen/Zotero/storage/DPBAT5Q4/Hastie et al. - 2009 - The Elements of Statistical Learning.pdf}
}
@article{heltbergPhysicalObservablesDetermine2021,
title = {Physical Observables to Determine the Nature of Membrane-Less Cellular Sub-Compartments},
author = {Heltberg, Mathias L and Miné-Hattab, Judith and Taddei, Angela and Walczak, Aleksandra M and Mora, Thierry},
editor = {Seminara, Agnese and Faraldo-Gómez, José D and Ronceray, Pierre},
date = {2021-10-22},
journaltitle = {eLife},
volume = {10},
pages = {e69181},
publisher = {{eLife Sciences Publications, Ltd}},
issn = {2050-084X},
doi = {10.7554/eLife.69181},
url = {https://doi.org/10.7554/eLife.69181},
urldate = {2022-11-18},
abstract = {The spatial organization of complex biochemical reactions is essential for the regulation of cellular processes. Membrane-less structures called foci containing high concentrations of specific proteins have been reported in a variety of contexts, but the mechanism of their formation is not fully understood. Several competing mechanisms exist that are difficult to distinguish empirically, including liquid-liquid phase separation, and the trapping of molecules by multiple binding sites. Here, we propose a theoretical framework and outline observables to differentiate between these scenarios from single molecule tracking experiments. In the binding site model, we derive relations between the distribution of proteins, their diffusion properties, and their radial displacement. We predict that protein search times can be reduced for targets inside a liquid droplet, but not in an aggregate of slowly moving binding sites. We use our results to reject the multiple binding site model for Rad52 foci, and find a picture consistent with a liquid-liquid phase separation. These results are applicable to future experiments and suggest different biological roles for liquid droplet and binding site foci.},
keywords = {cellular foci,liquid droplet,liquid-liquid phase separation,membrane-less sub-compartments,polymer binding model},
file = {/Users/michelsen/Zotero/storage/XWUDN5L3/Heltberg et al. - 2021 - Physical observables to determine the nature of me.pdf}
}
@article{heltbergSpatialHeterogeneityAffects2022a,
title = {Spatial Heterogeneity Affects Predictions from Early-Curve Fitting of Pandemic Outbreaks: A Case Study Using Population Data from {{Denmark}}},
author = {Heltberg, Mathias Spliid and Michelsen, Christian and Martiny, Emil S. and Christensen, Lasse Engbo and Jensen, Mogens H. and Halasa, Tariq and Petersen, Troels C.},
date = {2022-09-14},
journaltitle = {Royal Society Open Science},
volume = {9},
number = {9},
publisher = {{TheRoyal Society Publishing}},
issn = {2054-5703},
doi = {10.1098/rsos.220018},
abstract = {The modelling of pandemics has become a critical aspect in modern society. Even though artificial intelligence can help the forecast, the implementation of ordinary differential equations which estimate the time development in the number of susceptible, (exposed), infected and recovered (SIR/SEIR) individuals is still important in order to understand the stage of the pandemic. These models are based on simplified assumptions which constitute approximations, but to what extent this are erroneous is not understood since many factors can affect the development. In this paper, we introduce an agent-based model including spatial clustering and heterogeneities in connectivity and infection strength. Based on Danish population data, we estimate how this impacts the early prediction of a pandemic and compare this to the long-term development. Our results show that early phase SEIR model predictions overestimate the peak number of infected and the equilibrium level by at least a factor of two. These results are robust to variations of parameters influencing connection distances and independent of the distribution of infection rates.},
langid = {english},
keywords = {agent-based modelling,COVID-19,fitting,pandemics,spatial heterogenity}
}
@incollection{hethcoteThreeBasicEpidemiological1989,
title = {Three {{Basic Epidemiological Models}}},
booktitle = {Applied {{Mathematical Ecology}}},
author = {Hethcote, Herbert W.},
editor = {Levin, Simon A. and Hallam, Thomas G. and Gross, Louis J.},
date = {1989},
series = {Biomathematics},
pages = {119--144},
publisher = {{Springer}},
location = {{Berlin, Heidelberg}},
doi = {10.1007/978-3-642-61317-3_5},
url = {https://doi.org/10.1007/978-3-642-61317-3_5},
urldate = {2022-11-17},
abstract = {There are three basic types of deterministic models for infectious diseases which are spread by direct person-to-person contact in a population. Here these simplest models are formulated as initial value problems for systems of ordinary differential equations and are analysed mathematically. Theorems are stated regarding the asymptotic stability regions for the equilibrium points and phase plane portraits of solution paths are presented. Parameters are estimated for various diseases and are used to compare the vaccination levels necessary for herd immunity for these diseases. Although the three models presented are simple and their mathematical analyses are elementary, these models provide notation, concepts, intuition and foundation for considering more refined models. Some possible refinements are disease-related factors such as the infectious agent, mode of transmission, latent period, infectious period, susceptibility and resistance, but also social, cultural, Ecology by providing a sound intuitive understanding and complete proofs for the three most basic epidemiological models for microparasitic infections.},
isbn = {978-3-642-61317-3},
langid = {english},
keywords = {Endemic Equilibrium,Epidemic Model,Equilibrium Point,Herd Immunity,Initial Value Problem}
}
@article{homanNoUturnSamplerAdaptively2014,
title = {The {{No-U-turn}} Sampler: Adaptively Setting Path Lengths in {{Hamiltonian Monte Carlo}}},
shorttitle = {The {{No-U-turn}} Sampler},
author = {Homan, Matthew D. and Gelman, Andrew},
date = {2014-01-01},
journaltitle = {The Journal of Machine Learning Research},
shortjournal = {J. Mach. Learn. Res.},
volume = {15},
number = {1},
pages = {1593--1623},
issn = {1532-4435},
abstract = {Hamiltonian Monte Carlo (HMC) is a Markov chain Monte Carlo (MCMC) algorithm that avoids the random walk behavior and sensitivity to correlated parameters that plague many MCMC methods by taking a series of steps informed by first-order gradient information. These features allow it to converge to high-dimensional target distributions much more quickly than simpler methods such as random walk Metropolis or Gibbs sampling. However, HMC's performance is highly sensitive to two user-specified parameters: a step size ε and a desired number of steps L. In particular, if L is too small then the algorithm exhibits undesirable random walk behavior, while if L is too large the algorithm wastes computation. We introduce the No-U-Turn Sampler (NUTS), an extension to HMC that eliminates the need to set a number of steps L. NUTS uses a recursive algorithm to build a set of likely candidate points that spans a wide swath of the target distribution, stopping automatically when it starts to double back and retrace its steps. Empirically, NUTS performs at least as efficiently as (and sometimes more effciently than) a well tuned standard HMC method, without requiring user intervention or costly tuning runs. We also derive a method for adapting the step size parameter ε on the fly based on primal-dual averaging. NUTS can thus be used with no hand-tuning at all, making it suitable for applications such as BUGS-style automatic inference engines that require efficient "turnkey" samplers.},
keywords = {adaptive Monte Carlo,Bayesian inference,dual averaging,Hamiltonian Monte Carlo,Markov chain Monte Carlo},
file = {/Users/michelsen/Zotero/storage/FN4ZBJNS/Homan and Gelman - 2014 - The No-U-turn sampler adaptively setting path len.pdf}
}
@article{jonssonMapDamage2FastApproximate2013,
title = {{{mapDamage2}}.0: Fast Approximate {{Bayesian}} Estimates of Ancient {{DNA}} Damage Parameters},
shorttitle = {{{mapDamage2}}.0},
author = {Jónsson, Hákon and Ginolhac, Aurélien and Schubert, Mikkel and Johnson, Philip L. F. and Orlando, Ludovic},
date = {2013-07},
journaltitle = {Bioinformatics},
volume = {29},
number = {13},
pages = {1682--1684},
issn = {1367-4803, 1460-2059},
doi = {10.1093/bioinformatics/btt193},
abstract = {Motivation: Ancient DNA (aDNA) molecules in fossilized bones and teeth, coprolites, sediments, mummified specimens and museum collections represent fantastic sources of information for evolutionary biologists, revealing the agents of past epidemics and the dynamics of past populations. However, the analysis of aDNA generally faces two major issues. Firstly, sequences consist of a mixture of endogenous and various exogenous backgrounds, mostly microbial. Secondly, high nucleotide misincorporation rates can be observed as a result of severe post-mortem DNA damage. Such misincorporation patterns are instrumental to authenticate ancient sequences versus modern contaminants. We recently developed the user-friendly mapDamage package that identifies such patterns from next-generation sequencing (NGS) sequence datasets. The absence of formal statistical modeling of the DNA damage process, however, precluded rigorous quantitative comparisons across samples.},
langid = {english},
keywords = {mapDamage},
file = {/Users/michelsen/Zotero/storage/ADBPC7SX/Jónsson et al. - 2013 - mapDamage2.0 fast approximate Bayesian estimates .pdf}
}
@article{kermackContributionMathematicalTheory1927,
title = {A Contribution to the Mathematical Theory of Epidemics},
author = {Kermack, William Ogilvy and McKendrick, A. G. and Walker, Gilbert Thomas},
date = {1927-08},
journaltitle = {Proceedings of the Royal Society of London. Series A, Containing Papers of a Mathematical and Physical Character},
volume = {115},
number = {772},
pages = {700--721},
publisher = {{Royal Society}},
doi = {10.1098/rspa.1927.0118},
url = {https://royalsocietypublishing.org/doi/10.1098/rspa.1927.0118},
urldate = {2022-11-17},
abstract = {(1) One of the most striking features in the study of epidemics is the difficulty of finding a causal factor which appears to be adequate to account for the magnitude of the frequent epidemics of disease which visit almost every population. It was with a view to obtaining more insight regarding the effects of the various factors which govern the spread of contagious epidemics that the present investigation was undertaken. Reference may here be made to the work of Ross and Hudson (1915-17) in which the same problem is attacked. The problem is here carried to a further stage, and it is considered from a point of view which is in one sense more general. The problem may be summarised as follows: One (or more) infected person is introduced into a community of individuals, more or less susceptible to the disease in question. The disease spreads from the affected to the unaffected by contact infection. Each infected person runs through the course of his sickness, and finally is removed from the number of those who are sick, by recovery or by death. The chances of recovery or death vary from day to day during the course of his illness. The chances that the affected may convey infection to the unaffected are likewise dependent upon the stage of the sickness. As the epidemic spreads, the number of unaffected members of the community becomes reduced. Since the course of an epidemic is short compared with the life of an individual, the population may be considered as remaining constant, except in as far as it is modified by deaths due to the epidemic disease itself. In the course of time the epidemic may come to an end. One of the most important probems in epidemiology is to ascertain whether this termination occurs only when no susceptible individuals are left, or whether the interplay of the various factors of infectivity, recovery and mortality, may result in termination, whilst many susceptible individuals are still present in the unaffected population. It is difficult to treat this problem in its most general aspect. In the present communication discussion will be limited to the case in which all members of the community are initially equally susceptible to the disease, and it will be further assumed that complete immunity is conferred by a single infection.},
file = {/Users/michelsen/Zotero/storage/D8W6ATJM/kermack1927.pdf.pdf;/Users/michelsen/Zotero/storage/FRPQY5W8/Kermack et al. - 1927 - A contribution to the mathematical theory of epide.pdf}
}
@unpublished{killoranGeneratingDesigningDNA2017,
title = {Generating and Designing {{DNA}} with Deep Generative Models},
author = {Killoran, Nathan and Lee, Leo J. and Delong, Andrew and Duvenaud, David and Frey, Brendan J.},
date = {2017-12-17},
eprint = {1712.06148},
eprinttype = {arxiv},
primaryclass = {cs, q-bio, stat},
url = {http://arxiv.org/abs/1712.06148},
urldate = {2019-10-01},
abstract = {We propose generative neural network methods to generate DNA sequences and tune them to have desired properties. We present three approaches: creating synthetic DNA sequences using a generative adversarial network; a DNA-based variant of the activation maximization ("deep dream") design method; and a joint procedure which combines these two approaches together. We show that these tools capture important structures of the data and, when applied to designing probes for protein binding microarrays, allow us to generate new sequences whose properties are estimated to be superior to those found in the training data. We believe that these results open the door for applying deep generative models to advance genomics research.},
archiveprefix = {arXiv},
keywords = {Computer Science - Machine Learning,dna,GAN,Quantitative Biology - Genomics,Statistics - Machine Learning},
file = {/Users/michelsen/Zotero/storage/H96ZJIXN/Killoran et al. - 2017 - Generating and designing DNA with deep generative .pdf;/Users/michelsen/Zotero/storage/4ICGP6CQ/1712.html}
}
@article{korneliussenANGSDAnalysisNext2014,
title = {{{ANGSD}}: {{Analysis}} of {{Next Generation Sequencing Data}}},
shorttitle = {{{ANGSD}}},
author = {Korneliussen, Thorfinn Sand and Albrechtsen, Anders and Nielsen, Rasmus},
date = {2014-11-25},
journaltitle = {BMC Bioinformatics},
shortjournal = {BMC Bioinformatics},
volume = {15},
number = {1},
pages = {356},
issn = {1471-2105},
doi = {10.1186/s12859-014-0356-4},
abstract = {High-throughput DNA sequencing technologies are generating vast amounts of data. Fast, flexible and memory efficient implementations are needed in order to facilitate analyses of thousands of samples simultaneously.},
file = {/Users/michelsen/Zotero/storage/RVY8WNI4/Korneliussen et al. - 2014 - ANGSD Analysis of Next Generation Sequencing Data.pdf;/Users/michelsen/Zotero/storage/4QCYKUQB/s12859-014-0356-4.html}
}
@article{krauseCompleteMitochondrialDNA2010,
title = {The Complete Mitochondrial {{DNA}} Genome of an Unknown Hominin from Southern {{Siberia}}},
author = {Krause, Johannes and Fu, Qiaomei and Good, Jeffrey M. and Viola, Bence and Shunkov, Michael V. and Derevianko, Anatoli P. and Pääbo, Svante},
date = {2010-04},
journaltitle = {Nature},
volume = {464},
number = {7290},
pages = {894--897},
publisher = {{Nature Publishing Group}},
issn = {1476-4687},
doi = {10.1038/nature08976},
abstract = {Ancient mitochondrial DNA from a hominin individual who lived in the Altai Mountains in Southern Siberia between 48,000 and 30,000 years ago has been sequenced (http://go.nature.com/sokd1Ffor News story). Comparative genomics suggest that this mtDNA derives from an out-of-Africa migration distinct from those that gave rise to Neanderthals and modern humans. The stratigraphy of the Denisova Cave where the bone — part of the fifth 'little finger' digit — was excavated in 2008, suggests that this hominin lived close geographically to Neanderthals and modern humans, and at the same time. Taken with the presence of Homo floresiensis in Indonesia about 17,000 years ago, this discovery suggests that multiple late Pleistocene hominin lineages coexisted for long periods of time in Eurasia.},
issue = {7290},
langid = {english},
keywords = {Anthropology,Genomics,Mitochondrial genome},
file = {/Users/michelsen/Zotero/storage/7CA6P58N/Krause et al. - 2010 - The complete mitochondrial DNA genome of an unknow.pdf;/Users/michelsen/Zotero/storage/NF3GMGWB/krause2010.pdf.pdf;/Users/michelsen/Zotero/storage/N2L4XIDG/nature08976.html}
}
@article{krogerAnalyticalSolutionSIRmodel2020,
title = {Analytical Solution of the {{SIR-model}} for the Temporal Evolution of Epidemics. {{Part A}}: Time-Independent Reproduction Factor},
shorttitle = {Analytical Solution of the {{SIR-model}} for the Temporal Evolution of Epidemics. {{Part A}}},
author = {Kröger, M and Schlickeiser, R},
date = {2020-11-18},
journaltitle = {Journal of Physics A: Mathematical and Theoretical},
shortjournal = {J. Phys. A: Math. Theor.},
volume = {53},
number = {50},
pages = {505601},
issn = {1751-8113, 1751-8121},
doi = {10.1088/1751-8121/abc65d},
url = {https://iopscience.iop.org/article/10.1088/1751-8121/abc65d},
urldate = {2022-11-17},
abstract = {Abstract We revisit the susceptible-infectious-recovered/removed (SIR) model which is one of the simplest compartmental models. Many epidemological models are derivatives of this basic form. While an analytic solution to the SIR model is known in parametric form for the case of a time-independent infection rate, we derive an analytic solution for the more general case of a time-dependent infection rate, that is not limited to a certain range of parameter values. Our approach allows us to derive several exact analytic results characterizing all quantities, and moreover explicit, non-parametric, and accurate analytic approximants for the solution of the SIR model for time-independent infection rates. We relate all parameters of the SIR model to a measurable, usually reported quantity, namely the cumulated number of infected population and its first and second derivatives at an initial time t = 0, where data is assumed to be available. We address the question of how well the differential rate of infections is captured by the Gauss model (GM). To this end we calculate the peak height, width, and position of the bell-shaped rate analytically. We find that the SIR is captured by the GM within a range of times, which we discuss in detail. We prove that the SIR model exhibits an asymptotic behavior at large times that is different from the logistic model, while the difference between the two models still decreases with increasing reproduction factor. This part A of our work treats the original SIR model to hold at all times, while this assumption will be relaxed in part B. Relaxing this assumption allows us to formulate initial conditions incompatible with the original SIR model.},
langid = {english}
}
@inproceedings{lamNumbaLLVMbasedPython2015,
title = {Numba: A {{LLVM-based Python JIT}} Compiler},
shorttitle = {Numba},
booktitle = {Proceedings of the {{Second Workshop}} on the {{LLVM Compiler Infrastructure}} in {{HPC}}},
author = {Lam, Siu Kwan and Pitrou, Antoine and Seibert, Stanley},
date = {2015-11-15},
series = {{{LLVM}} '15},
pages = {1--6},
publisher = {{Association for Computing Machinery}},
location = {{New York, NY, USA}},
doi = {10.1145/2833157.2833162},
url = {https://github.com/numba/numba},
abstract = {Dynamic, interpreted languages, like Python, are attractive for domain-experts and scientists experimenting with new ideas. However, the performance of the interpreter is often a barrier when scaling to larger data sets. This paper presents a just-in-time compiler for Python that focuses in scientific and array-oriented computing. Starting with the simple syntax of Python, Numba compiles a subset of the language into efficient machine code that is comparable in performance to a traditional compiled language. In addition, we share our experience in building a JIT compiler using LLVM[1].},
isbn = {978-1-4503-4005-2},
keywords = {compiler,LLVM,numba,Python},
file = {/Users/michelsen/Zotero/storage/UDC9TQRX/Lam et al. - 2015 - Numba a LLVM-based Python JIT compiler.pdf}
}
@article{liInferenceHumanPopulation2011,
title = {Inference of Human Population History from Individual Whole-Genome Sequences},
author = {Li, Heng and Durbin, Richard},
date = {2011-07},
journaltitle = {Nature},
volume = {475},
number = {7357},
pages = {493--496},
issn = {1476-4687},
doi = {10.1038/nature10231},
abstract = {The history of human population size is important for understanding human evolution. Various studies1,2,3,4,5 have found evidence for a founder event (bottleneck) in East Asian and European populations, associated with the human dispersal out-of-Africa event around 60 thousand years (kyr) ago. However, these studies have had to assume simplified demographic models with few parameters, and they do not provide a precise date for the start and stop times of the bottleneck. Here, with fewer assumptions on population size changes, we present a more detailed history of human population sizes between approximately ten thousand and a million years ago, using the pairwise sequentially Markovian coalescent model applied to the complete diploid genome sequences of a Chinese male (YH)6, a Korean male (SJK)7, three European individuals (J. C. Venter8, NA12891 and NA12878 (ref. 9)) and two Yoruba males (NA18507 (ref. 10) and NA19239). We infer that European and Chinese populations had very similar population-size histories before 10–20 kyr ago. Both populations experienced a severe bottleneck 10–60 kyr ago, whereas African populations experienced a milder bottleneck from which they recovered earlier. All three populations have an elevated effective population size between 60 and 250 kyr ago, possibly due to population substructure11. We also infer that the differentiation of genetically modern humans may have started as early as 100–120 kyr ago12, but considerable genetic exchanges may still have occurred until 20–40 kyr ago.},
langid = {english},
file = {/Users/michelsen/Zotero/storage/F6P22Y3N/Li and Durbin - 2011 - Inference of human population history from individ.pdf;/Users/michelsen/Zotero/storage/LLCR4JBW/nature10231.html}
}
@article{lundbergExplainableMachinelearningPredictions2018a,
title = {Explainable Machine-Learning Predictions for the Prevention of Hypoxaemia during Surgery},
author = {Lundberg, Scott M. and Nair, Bala and Vavilala, Monica S. and Horibe, Mayumi and Eisses, Michael J. and Adams, Trevor and Liston, David E. and Low, Daniel King-Wai and Newman, Shu-Fang and Kim, Jerry and Lee, Su-In},
date = {2018-10},
journaltitle = {Nature Biomedical Engineering},
shortjournal = {Nat Biomed Eng},
volume = {2},
number = {10},
pages = {749--760},
publisher = {{Nature Publishing Group}},
issn = {2157-846X},
doi = {10.1038/s41551-018-0304-0},
url = {https://www.nature.com/articles/s41551-018-0304-0},
urldate = {2022-11-17},
abstract = {Although anaesthesiologists strive to avoid hypoxaemia during surgery, reliably predicting future intraoperative hypoxaemia is not possible at present. Here, we report the development and testing of a machine-learning-based system that predicts the risk of hypoxaemia and provides explanations of the risk factors in real time during general anaesthesia. The system, which was trained on minute-by-minute data from the electronic medical records of over 50,000 surgeries, improved the performance of anaesthesiologists by providing interpretable hypoxaemia risks and contributing factors. The explanations for the predictions are broadly consistent with the literature and with prior knowledge from anaesthesiologists. Our results suggest that if anaesthesiologists currently anticipate 15\% of hypoxaemia events, with the assistance of this system they could anticipate 30\%, a large portion of which may benefit from early intervention because they are associated with modifiable factors. The system can help improve the clinical understanding of hypoxaemia risk during anaesthesia care by providing general insights into the exact changes in risk induced by certain characteristics of the patient or procedure.},
issue = {10},
langid = {english},
keywords = {Computational science,Health care},
file = {/Users/michelsen/Zotero/storage/LTLWX2YG/lundberg2018.pdf.pdf;/Users/michelsen/Zotero/storage/YN3RSEG4/Lundberg et al. - 2018 - Explainable machine-learning predictions for the p.pdf;/Users/michelsen/Zotero/storage/NVHGE7HG/s41551-018-0304-0.html}
}
@article{lundbergLocalExplanationsGlobal2020,
title = {From Local Explanations to Global Understanding with Explainable {{AI}} for Trees},
author = {Lundberg, Scott M. and Erion, Gabriel and Chen, Hugh and DeGrave, Alex and Prutkin, Jordan M. and Nair, Bala and Katz, Ronit and Himmelfarb, Jonathan and Bansal, Nisha and Lee, Su-In},
date = {2020-01},
journaltitle = {Nature Machine Intelligence},
shortjournal = {Nat Mach Intell},
volume = {2},
number = {1},
pages = {56--67},
publisher = {{Nature Publishing Group}},
issn = {2522-5839},
doi = {10.1038/s42256-019-0138-9},
url = {https://www.nature.com/articles/s42256-019-0138-9},
urldate = {2022-11-17},
abstract = {Tree-based machine learning models such as random forests, decision trees and gradient boosted trees are popular nonlinear predictive models, yet comparatively little attention has been paid to explaining their predictions. Here we improve the interpretability of tree-based models through three main contributions. (1) A polynomial time algorithm to compute optimal explanations based on game theory. (2) A new type of explanation that directly measures local feature interaction effects. (3) A new set of tools for understanding global model structure based on combining many local explanations of each prediction. We apply these tools to three medical machine learning problems and show how combining many high-quality local explanations allows us to represent global structure while retaining local faithfulness to the original model. These tools enable us to (1) identify high-magnitude but low-frequency nonlinear mortality risk factors in the US population, (2) highlight distinct population subgroups with shared risk characteristics, (3) identify nonlinear interaction effects among risk factors for chronic kidney disease and (4) monitor a machine learning model deployed in a hospital by identifying which features are degrading the model’s performance over time. Given the popularity of tree-based machine learning models, these improvements to their interpretability have implications across a broad set of domains.},
issue = {1},
langid = {english},
keywords = {Computer science,Medical research,Software},
file = {/Users/michelsen/Zotero/storage/G3S6ZF2S/lundberg2020.pdf.pdf;/Users/michelsen/Zotero/storage/P9BX6BD4/Lundberg et al. - 2020 - From local explanations to global understanding wi.pdf}
}
@inproceedings{lundbergUnifiedApproachInterpreting2017,
title = {A {{Unified Approach}} to {{Interpreting Model Predictions}}},
booktitle = {Advances in {{Neural Information Processing Systems}}},
author = {Lundberg, Scott M and Lee, Su-In},
date = {2017},
volume = {30},
publisher = {{Curran Associates, Inc.}},
url = {https://proceedings.neurips.cc/paper/2017/hash/8a20a8621978632d76c43dfd28b67767-Abstract.html},
urldate = {2022-11-17},
abstract = {Understanding why a model makes a certain prediction can be as crucial as the prediction's accuracy in many applications. However, the highest accuracy for large modern datasets is often achieved by complex models that even experts struggle to interpret, such as ensemble or deep learning models, creating a tension between accuracy and interpretability. In response, various methods have recently been proposed to help users interpret the predictions of complex models, but it is often unclear how these methods are related and when one method is preferable over another. To address this problem, we present a unified framework for interpreting predictions, SHAP (SHapley Additive exPlanations). SHAP assigns each feature an importance value for a particular prediction. Its novel components include: (1) the identification of a new class of additive feature importance measures, and (2) theoretical results showing there is a unique solution in this class with a set of desirable properties. The new class unifies six existing methods, notable because several recent methods in the class lack the proposed desirable properties. Based on insights from this unification, we present new methods that show improved computational performance and/or better consistency with human intuition than previous approaches.},
file = {/Users/michelsen/Zotero/storage/DXD675E2/Lundberg and Lee - 2017 - A Unified Approach to Interpreting Model Predictio.pdf}
}
@article{makComparativePerformanceBGISEQ5002017,
title = {Comparative Performance of the {{BGISEQ-500}} vs {{Illumina HiSeq2500}} Sequencing Platforms for Palaeogenomic Sequencing},
author = {Mak, Sarah Siu Tze and Gopalakrishnan, Shyam and Carøe, Christian and Geng, Chunyu and Liu, Shanlin and Sinding, Mikkel-Holger S and Kuderna, Lukas F K and Zhang, Wenwei and Fu, Shujin and Vieira, Filipe G and Germonpré, Mietje and Bocherens, Hervé and Fedorov, Sergey and Petersen, Bent and Sicheritz-Pontén, Thomas and Marques-Bonet, Tomas and Zhang, Guojie and Jiang, Hui and Gilbert, M Thomas P},
date = {2017-06-26},
journaltitle = {GigaScience},
shortjournal = {Gigascience},
volume = {6},
number = {8},
eprint = {28854615},
eprinttype = {pmid},
pages = {1--13},
issn = {2047-217X},
doi = {10.1093/gigascience/gix049},
abstract = {Ancient DNA research has been revolutionized following development of next-generation sequencing platforms. Although a number of such platforms have been applied to ancient DNA samples, the Illumina series are the dominant choice today, mainly because of high production capacities and short read production. Recently a potentially attractive alternative platform for palaeogenomic data generation has been developed, the BGISEQ-500, whose sequence output are comparable with the Illumina series. In this study, we modified the standard BGISEQ-500 library preparation specifically for use on degraded DNA, then directly compared the sequencing performance and data quality of the BGISEQ-500 to the Illumina HiSeq2500 platform on DNA extracted from 8 historic and ancient dog and wolf samples. The data generated were largely comparable between sequencing platforms, with no statistically significant difference observed for parameters including level (P = 0.371) and average sequence length (P = 0718) of endogenous nuclear DNA, sequence GC content (P = 0.311), double-stranded DNA damage rate (v. 0.309), and sequence clonality (P = 0.093). Small significant differences were found in single-strand DNA damage rate (δS; slightly lower for the BGISEQ-500, P = 0.011) and the background rate of difference from the reference genome (θ; slightly higher for BGISEQ-500, P = 0.012). This may result from the differences in amplification cycles used to polymerase chain reaction–amplify the libraries. A significant difference was also observed in the mitochondrial DNA percentages recovered (P = 0.018), although we believe this is likely a stochastic effect relating to the extremely low levels of mitochondria that were sequenced from 3 of the samples with overall very low levels of endogenous DNA. Although we acknowledge that our analyses were limited to animal material, our observations suggest that the BGISEQ-500 holds the potential to represent a valid and potentially valuable alternative platform for palaeogenomic data generation that is worthy of future exploration by those interested in the sequencing and analysis of degraded DNA.},
pmcid = {PMC5570000},
file = {/Users/michelsen/Zotero/storage/X3XQ9ILX/Mak et al. - 2017 - Comparative performance of the BGISEQ-500 vs Illum.pdf}
}
@article{manleyHighdensityMappingSinglemolecule2008,
title = {High-Density Mapping of Single-Molecule Trajectories with Photoactivated Localization Microscopy},
author = {Manley, Suliana and Gillette, Jennifer M. and Patterson, George H. and Shroff, Hari and Hess, Harald F. and Betzig, Eric and Lippincott-Schwartz, Jennifer},
date = {2008-02},
journaltitle = {Nature Methods},
shortjournal = {Nat Methods},
volume = {5},
number = {2},
pages = {155--157},
publisher = {{Nature Publishing Group}},
issn = {1548-7105},
doi = {10.1038/nmeth.1176},
url = {https://www.nature.com/articles/nmeth.1176},
urldate = {2022-11-18},
abstract = {We combined photoactivated localization microscopy (PALM) with live-cell single-particle tracking to create a new method termed sptPALM. We created spatially resolved maps of single-molecule motions by imaging the membrane proteins Gag and VSVG, and obtained several orders of magnitude more trajectories per cell than traditional single-particle tracking enables. By probing distinct subsets of molecules, sptPALM can provide insight into the origins of spatial and temporal heterogeneities in membranes.},
issue = {2},
langid = {english},
keywords = {Bioinformatics,Biological Microscopy,Biological Techniques,Biomedical Engineering/Biotechnology,general,Life Sciences,Proteomics},
file = {/Users/michelsen/Zotero/storage/C97CFIWV/manley2008.pdf.pdf;/Users/michelsen/Zotero/storage/EJSILYJH/Manley et al. - 2008 - High-density mapping of single-molecule trajectori.pdf;/Users/michelsen/Zotero/storage/B47CW9RP/nmeth.html}
}
@article{martinianoRemovingReferenceBias2020,
title = {Removing Reference Bias and Improving Indel Calling in Ancient {{DNA}} Data Analysis by Mapping to a Sequence Variation Graph},
author = {Martiniano, Rui and Garrison, Erik and Jones, Eppie R. and Manica, Andrea and Durbin, Richard},
date = {2020-09-17},
journaltitle = {Genome Biology},
shortjournal = {Genome Biology},
volume = {21},
number = {1},
pages = {250},
issn = {1474-760X},
doi = {10.1186/s13059-020-02160-7},
abstract = {During the last decade, the analysis of ancient DNA (aDNA) sequence has become a powerful tool for the study of past human populations. However, the degraded nature of aDNA means that aDNA molecules are short and frequently mutated by post-mortem chemical modifications. These features decrease read mapping accuracy and increase reference bias, in which reads containing non-reference alleles are less likely to be mapped than those containing reference alleles. Alternative approaches have been developed to replace the linear reference with a variation graph which includes known alternative variants at each genetic locus. Here, we evaluate the use of variation graph software vg to avoid reference bias for aDNA and compare with existing methods.},
keywords = {Ancient DNA,Reference bias,Sequence alignment,Variation graph},
file = {/Users/michelsen/Zotero/storage/H4XE7YZQ/Martiniano et al. - 2020 - Removing reference bias and improving indel callin.pdf;/Users/michelsen/Zotero/storage/KAVVBHIH/[email protected];/Users/michelsen/Zotero/storage/I8MAVB8Z/s13059-020-02160-7.html}
}
@book{mcelreathStatisticalRethinkingBayesian2020,
title = {Statistical Rethinking: A {{Bayesian}} Course with Examples in {{R}} and {{Stan}}},
shorttitle = {Statistical Rethinking},
author = {McElreath, Richard},
date = {2020},
series = {{{CRC}} Texts in Statistical Science},
edition = {2},
publisher = {{Taylor and Francis, CRC Press}},
location = {{Boca Raton}},
abstract = {"Statistical Rethinking: A Bayesian Course with Examples in R and Stan, Second Edition builds knowledge/confidence in statistical modeling. Pushes readers to perform step-by-step calculations (usually automated.) Unique, computational approach ensures readers understand details to make reasonable choices and interpretations in their modeling work"--},
isbn = {978-0-367-13991-9}
}
@article{mckennaGenomeAnalysisToolkit2010,
title = {The {{Genome Analysis Toolkit}}: A {{MapReduce}} Framework for Analyzing next-Generation {{DNA}} Sequencing Data},
shorttitle = {The {{Genome Analysis Toolkit}}},
author = {McKenna, Aaron and Hanna, Matthew and Banks, Eric and Sivachenko, Andrey and Cibulskis, Kristian and Kernytsky, Andrew and Garimella, Kiran and Altshuler, David and Gabriel, Stacey and Daly, Mark and DePristo, Mark A.},
date = {2010-09},
journaltitle = {Genome Research},
shortjournal = {Genome Res},
volume = {20},
number = {9},
eprint = {20644199},
eprinttype = {pmid},
pages = {1297--1303},
issn = {1549-5469},
doi = {10.1101/gr.107524.110},
abstract = {Next-generation DNA sequencing (NGS) projects, such as the 1000 Genomes Project, are already revolutionizing our understanding of genetic variation among individuals. However, the massive data sets generated by NGS--the 1000 Genome pilot alone includes nearly five terabases--make writing feature-rich, efficient, and robust analysis tools difficult for even computationally sophisticated individuals. Indeed, many professionals are limited in the scope and the ease with which they can answer scientific questions by the complexity of accessing and manipulating the data produced by these machines. Here, we discuss our Genome Analysis Toolkit (GATK), a structured programming framework designed to ease the development of efficient and robust analysis tools for next-generation DNA sequencers using the functional programming philosophy of MapReduce. The GATK provides a small but rich set of data access patterns that encompass the majority of analysis tool needs. Separating specific analysis calculations from common data management infrastructure enables us to optimize the GATK framework for correctness, stability, and CPU and memory efficiency and to enable distributed and shared memory parallelization. We highlight the capabilities of the GATK by describing the implementation and application of robust, scale-tolerant tools like coverage calculators and single nucleotide polymorphism (SNP) calling. We conclude that the GATK programming framework enables developers and analysts to quickly and easily write efficient and robust NGS tools, many of which have already been incorporated into large-scale sequencing projects like the 1000 Genomes Project and The Cancer Genome Atlas.},
langid = {english},
pmcid = {PMC2928508},
keywords = {Base Sequence,Genome,Genomics,Sequence Analysis; DNA,Software},
file = {/Users/michelsen/Zotero/storage/JMQWTD9P/McKenna et al. - 2010 - The Genome Analysis Toolkit a MapReduce framework.pdf;/Users/michelsen/Zotero/storage/ZGZA63AH/mckenna2010.pdf.pdf}
}
@book{mendelgregorVersucheUberPflanzenhybriden1866,
title = {Versuche Über Pflanzen-Hybriden},
author = {{Mendel, Gregor}},
date = {1866},
pages = {464},
publisher = {{Brünn, Im Verlage des Vereines, 1866}},
url = {https://www.biodiversitylibrary.org/item/124139},
copyright = {NOT\textsubscript{I}N\textsubscript{C}OPYRIGHT},
keywords = {Mendel's law|Plant hybridization|}
}
@article{meyerNuclearDNASequences2016a,
title = {Nuclear {{DNA}} Sequences from the {{Middle Pleistocene Sima}} de Los {{Huesos}} Hominins},
author = {Meyer, Matthias and Arsuaga, Juan-Luis and de Filippo, Cesare and Nagel, Sarah and Aximu-Petri, Ayinuer and Nickel, Birgit and Martínez, Ignacio and Gracia, Ana and de Castro, José María Bermúdez and Carbonell, Eudald and Viola, Bence and Kelso, Janet and Prüfer, Kay and Pääbo, Svante},
options = {useprefix=true},
date = {2016-03},
journaltitle = {Nature},
volume = {531},
number = {7595},
pages = {504--507},
issn = {1476-4687},
doi = {10.1038/nature17405},
abstract = {A unique assemblage of 28 hominin individuals, found in Sima de los Huesos in the Sierra de Atapuerca in Spain, has recently been dated to approximately 430,000 years ago1. An interesting question is how these Middle Pleistocene hominins were related to those who lived in the Late Pleistocene epoch, in particular to Neanderthals in western Eurasia and to Denisovans, a sister group of Neanderthals so far known only from southern Siberia. While the Sima de los Huesos hominins share some derived morphological features with Neanderthals, the mitochondrial genome retrieved from one individual from Sima de los Huesos is more closely related to the mitochondrial DNA of Denisovans than to that of Neanderthals2. However, since the mitochondrial DNA does not reveal the full picture of relationships among populations, we have investigated DNA preservation in several individuals found at Sima de los Huesos. Here we recover nuclear DNA sequences from two specimens, which show that the Sima de los Huesos hominins were related to Neanderthals rather than to Denisovans, indicating that the population divergence between Neanderthals and Denisovans predates 430,000 years ago. A mitochondrial DNA recovered from one of the specimens shares the previously described relationship to Denisovan mitochondrial DNAs, suggesting, among other possibilities, that the mitochondrial DNA gene pool of Neanderthals turned over later in their history.},
langid = {english},
file = {/Users/michelsen/Zotero/storage/4J452WUH/Meyer et al. - 2016 - Nuclear DNA sequences from the Middle Pleistocene .pdf;/Users/michelsen/Zotero/storage/8STA35B9/nature17405.html}
}
@thesis{michelsenPhysicistApproachMachine2020,
title = {A Physicist’s Approach to Machine Learning – Understanding the Basic Bricks},
author = {Michelsen, Christian},
date = {2020},
institution = {{University of Copenhagen}}
}
@article{mullisSpecificEnzymaticAmplification1986,
title = {Specific Enzymatic Amplification of {{DNA}} in Vitro: The Polymerase Chain Reaction},
shorttitle = {Specific Enzymatic Amplification of {{DNA}} in Vitro},
author = {Mullis, K. and Faloona, F. and Scharf, S. and Saiki, R. and Horn, G. and Erlich, H.},
date = {1986},
journaltitle = {Cold Spring Harbor Symposia on Quantitative Biology},
shortjournal = {Cold Spring Harb Symp Quant Biol},
volume = {51 Pt 1},
eprint = {3472723},
eprinttype = {pmid},
pages = {263--273},
issn = {0091-7451},
doi = {10.1101/sqb.1986.051.01.032},
langid = {english},
keywords = {Alleles,Base Sequence,Cell Line,Cloning; Molecular,DNA,DNA-Directed DNA Polymerase,Genes,Humans,Templates; Genetic},
file = {/Users/michelsen/Zotero/storage/6557LKBM/mullis1986.pdf.pdf}
}
@book{murphyMachineLearningProbabilistic2012,
title = {Machine Learning: {{A}} Probabilistic Perspective},
author = {Murphy, Kevin P.},
date = {2012},
publisher = {{The MIT Press}},
abstract = {Today's Web-enabled deluge of electronic data calls for automated methods of data analysis. Machine learning provides these, developing methods that can automatically detect patterns in data and then use the uncovered patterns to predict future data. This textbook offers a comprehensive and self-contained introduction to the field of machine learning, based on a unified, probabilistic approach. The coverage combines breadth and depth, offering necessary background material on such topics as probability, optimization, and linear algebra as well as discussion of recent developments in the field, including conditional random fields, L1 regularization, and deep learning. The book is written in an informal, accessible style, complete with pseudo-code for the most important algorithms. All topics are copiously illustrated with color images and worked examples drawn from such application domains as biology, text processing, computer vision, and robotics. Rather than providing a cookbook of different heuristic methods, the book stresses a principled model-based approach, often using the language of graphical models to specify models in a concise and intuitive way. Almost all the models described have been implemented in a MATLAB software package–PMTK (probabilistic modeling toolkit)–that is freely available online. The book is suitable for upper-level undergraduates with an introductory-level college math background and beginning graduate students.},
isbn = {0-262-01802-0}
}
@book{nealMCMCUsingHamiltonian2011,
title = {{{MCMC Using Hamiltonian Dynamics}}},
author = {Neal, Radford M.},
date = {2011-05-10},
publisher = {{Routledge Handbooks Online}},
doi = {10.1201/b10905-7},
url = {https://www.routledgehandbooks.com/doi/10.1201/b10905-7},
urldate = {2022-11-16},
abstract = {Since their popularization in the 1990s, Markov chain Monte Carlo (MCMC) methods have revolutionized statistical computing and have had an especially profound impact on the practice of Bayesian statistics. Furthermore, MCMC methods have enabled the development and use of intricate models in an astonishing array of disciplines as diverse as fisheries science and economics. The wide-ranging practical importance of MCMC has sparked an expansive and deep investigation into fundamental Markov chain theory. The Handbook of Markov Chain Monte Carlo provides a reference for the broad audience of developers and users of MCMC methodology interested in keeping up with cutting-edge theory and applications. The first half of the book covers MCMC foundations, methodology, and algorithms. The second half considers the use of MCMC in a variety of practical applications including in educational research, astrophysics, brain imaging, ecology, and sociology. The in-depth introductory section of the book allows graduate students and practicing scientists new to MCMC to become thoroughly acquainted with the basic theory, algorithms, and applications. The book supplies detailed examples and case studies of realistic scientific problems presenting the diversity of methods used by the wide-ranging MCMC community. Those familiar with MCMC methods will find this book a useful refresher of current theory and recent developments.},
isbn = {978-1-4200-7941-8 978-1-4200-7942-5},
langid = {english}
}
@unpublished{ngDna2vecConsistentVector2017,
title = {Dna2vec: {{Consistent}} Vector Representations of Variable-Length k-Mers},
shorttitle = {Dna2vec},
author = {Ng, Patrick},
date = {2017-01-23},
eprint = {1701.06279},
eprinttype = {arxiv},
primaryclass = {cs, q-bio, stat},
url = {http://arxiv.org/abs/1701.06279},
urldate = {2019-10-01},
abstract = {One of the ubiquitous representation of long DNA sequence is dividing it into shorter k-mer components. Unfortunately, the straightforward vector encoding of k-mer as a one-hot vector is vulnerable to the curse of dimensionality. Worse yet, the distance between any pair of one-hot vectors is equidistant. This is particularly problematic when applying the latest machine learning algorithms to solve problems in biological sequence analysis. In this paper, we propose a novel method to train distributed representations of variable-length k-mers. Our method is based on the popular word embedding model word2vec, which is trained on a shallow two-layer neural network. Our experiments provide evidence that the summing of dna2vec vectors is akin to nucleotides concatenation. We also demonstrate that there is correlation between Needleman-Wunsch similarity score and cosine similarity of dna2vec vectors.},
archiveprefix = {arXiv},
keywords = {Computer Science - Computation and Language,Computer Science - Machine Learning,dna,dna2vec,embedding,Quantitative Biology - Quantitative Methods,Statistics - Machine Learning},
file = {/Users/michelsen/Zotero/storage/YZQUSFFJ/Ng - 2017 - dna2vec Consistent vector representations of vari.pdf;/Users/michelsen/Zotero/storage/CHA7Y2FC/1701.html}
}
@article{nielsenGenotypeSNPCalling2011,
title = {Genotype and {{SNP}} Calling from Next-Generation Sequencing Data},
author = {Nielsen, Rasmus and Paul, Joshua S. and Albrechtsen, Anders and Song, Yun S.},
date = {2011-06},
journaltitle = {Nature reviews. Genetics},
shortjournal = {Nat Rev Genet},
volume = {12},
number = {6},
eprint = {21587300},
eprinttype = {pmid},
pages = {443--451},
issn = {1471-0056},
doi = {10.1038/nrg2986},
abstract = {Meaningful analysis of next-generation sequencing (NGS) data, which are produced extensively by genetics and genomics studies, relies crucially on the accurate calling of SNPs and genotypes. Recently developed statistical methods both improve and quantify the considerable uncertainty associated with genotype calling, and will especially benefit the growing number of studies using low- to medium-coverage data. We review these methods and provide a guide for their use in NGS studies.},
pmcid = {PMC3593722},
file = {/Users/michelsen/Zotero/storage/BL7KEMGY/nielsen2011.pdf.pdf;/Users/michelsen/Zotero/storage/Q9LNHW3V/Nielsen et al. - 2011 - Genotype and SNP calling from next-generation sequ.pdf}
}
@article{nielsenTracingPeoplingWorld2017,
title = {Tracing the Peopling of the World through Genomics},
author = {Nielsen, Rasmus and Akey, Joshua M. and Jakobsson, Mattias and Pritchard, Jonathan K. and Tishkoff, Sarah and Willerslev, Eske},
date = {2017-01},
journaltitle = {Nature},
volume = {541},
number = {7637},
pages = {302--310},
issn = {1476-4687},
doi = {10.1038/nature21347},
abstract = {Advances in the sequencing and the analysis of the genomes of both modern and ancient peoples have facilitated a number of breakthroughs in our understanding of human evolutionary history. These include the discovery of interbreeding between anatomically modern humans and extinct hominins; the development of an increasingly detailed description of the complex dispersal of modern humans out of Africa and their population expansion worldwide; and the characterization of many of the genetic adaptions of humans to local environmental conditions. Our interpretation of the evolutionary history and adaptation of humans is being transformed by analyses of these new genomic data.},
langid = {english},
file = {/Users/michelsen/Zotero/storage/MQ5Z6BM8/Nielsen et al. - 2017 - Tracing the peopling of the world through genomics.pdf;/Users/michelsen/Zotero/storage/K8I2HX53/nature21347.html}
}
@article{orlandoRecalibratingEquusEvolution2013,
title = {Recalibrating {{Equus}} Evolution Using the Genome Sequence of an Early {{Middle Pleistocene}} Horse},
author = {Orlando, Ludovic and Ginolhac, Aurélien and Zhang, Guojie and Froese, Duane and Albrechtsen, Anders and Stiller, Mathias and Schubert, Mikkel and Cappellini, Enrico and Petersen, Bent and Moltke, Ida and Johnson, Philip L. F. and Fumagalli, Matteo and Vilstrup, Julia T. and Raghavan, Maanasa and Korneliussen, Thorfinn and Malaspinas, Anna-Sapfo and Vogt, Josef and Szklarczyk, Damian and Kelstrup, Christian D. and Vinther, Jakob and Dolocan, Andrei and Stenderup, Jesper and Velazquez, Amhed M. V. and Cahill, James and Rasmussen, Morten and Wang, Xiaoli and Min, Jiumeng and Zazula, Grant D. and Seguin-Orlando, Andaine and Mortensen, Cecilie and Magnussen, Kim and Thompson, John F. and Weinstock, Jacobo and Gregersen, Kristian and Røed, Knut H. and Eisenmann, Véra and Rubin, Carl J. and Miller, Donald C. and Antczak, Douglas F. and Bertelsen, Mads F. and Brunak, Søren and Al-Rasheid, Khaled A. S. and Ryder, Oliver and Andersson, Leif and Mundy, John and Krogh, Anders and Gilbert, M. Thomas P. and Kjær, Kurt and Sicheritz-Ponten, Thomas and Jensen, Lars Juhl and Olsen, Jesper V. and Hofreiter, Michael and Nielsen, Rasmus and Shapiro, Beth and Wang, Jun and Willerslev, Eske},
date = {2013-07},
journaltitle = {Nature},
volume = {499},
number = {7456},
pages = {74--78},
publisher = {{Nature Publishing Group}},
issn = {1476-4687},
doi = {10.1038/nature12323},
abstract = {A low-coverage draft genome sequence from a horse bone recovered from permafrost dated to approximately 560–780 thousand years ago is presented; this represents the oldest full genome sequence to date by almost an order of magnitude.},
issue = {7456},
langid = {english},
keywords = {Evolutionary genetics},
file = {/Users/michelsen/Zotero/storage/2WL4MPHI/orlando2013.pdf.pdf;/Users/michelsen/Zotero/storage/ITMZ8KJC/Orlando et al. - 2013 - Recalibrating Equus evolution using the genome seq.pdf;/Users/michelsen/Zotero/storage/RSGRY4SB/nature12323.html}
}
@article{oswaldImagingQuantificationTransmembrane2014,
title = {Imaging and Quantification of Trans-Membrane Protein Diffusion in Living Bacteria},
author = {Oswald, Felix and Bank, Ernst L. M. and Bollen, Yves J. M. and Peterman, Erwin J. G.},
date = {2014-06-05},
journaltitle = {Physical Chemistry Chemical Physics},
shortjournal = {Phys. Chem. Chem. Phys.},
volume = {16},
number = {25},
pages = {12625--12634},
publisher = {{The Royal Society of Chemistry}},
issn = {1463-9084},
doi = {10.1039/C4CP00299G},
url = {https://pubs.rsc.org/en/content/articlelanding/2014/cp/c4cp00299g},
urldate = {2022-11-18},
abstract = {The cytoplasmic membrane forms the barrier between any cell's interior and the outside world. It contains many proteins that enable essential processes such as the transmission of signals, the uptake of nutrients, and cell division. In the case of prokaryotes, which do not contain intracellular membranes, the cytoplasmic membrane also contains proteins for respiration and protein folding. Mutual interactions and specific localization of these proteins depend on two-dimensional diffusion driven by thermal fluctuations. The experimental investigation of membrane–protein diffusion in bacteria is challenging due to their small size, only a few times larger than the resolution of an optical microscope. Here, we review fluorescence microscopy-based methods to study diffusion of membrane proteins in living bacteria. The main focus is on data-analysis tools to extract diffusion coefficients from single-particle tracking data obtained by single-molecule fluorescence microscopy. We introduce a novel approach, IPODD (inverse projection of displacement distributions), to obtain diffusion coefficients from the usually obtained 2-D projected diffusion trajectories of the highly 3-D curved bacterial membrane. This method provides, in contrast to traditional mean-squared-displacement methods, correct diffusion coefficients and allows unravelling of heterogeneously diffusing populations.},
langid = {english},
file = {/Users/michelsen/Zotero/storage/4DQE6SPQ/Oswald et al. - 2014 - Imaging and quantification of trans-membrane prote.pdf;/Users/michelsen/Zotero/storage/F9N2ZSXW/Oswald et al. - 2014 - Imaging and quantification of trans-membrane prote.pdf;/Users/michelsen/Zotero/storage/IGUV532M/oswald2014.pdf.pdf;/Users/michelsen/Zotero/storage/7Y7X6BNC/c4cp00299g.html}
}
@article{paaboMolecularCloningAncient1985,
title = {Molecular Cloning of {{Ancient Egyptian}} Mummy {{DNA}}},
author = {Pääbo, Svante},
date = {1985-04},
journaltitle = {Nature},
volume = {314},
number = {6012},
pages = {644--645},
publisher = {{Nature Publishing Group}},
issn = {1476-4687},
doi = {10.1038/314644a0},
abstract = {Artificial mummification was practised in Egypt from ∼ 2600 BC until the fourth century AD. Because of the dry Egyptian climate, however, there are also many natural mummies preserved from earlier as well as later times. To elucidate whether this unique source of ancient human remains can be used for molecular genetic analyses, 23 mummies were investigated for DNA content. One 2,400-yr-old mummy of a child was found to contain DNA that could be molecularly cloned in a plasmid vector. I report here that one such clone contains two members of the Alu family of human repetitive DNA sequences, as detected by DNA hybridizations and nucleotide sequencing. These analyses show that substantial pieces of mummy DNA (3.4 kilobases) can be cloned and that the DNA fragments seem to contain little or no modifications introduced postmortem.},
issue = {6012},
langid = {english},
keywords = {Humanities and Social Sciences,multidisciplinary,Science},
file = {/Users/michelsen/Zotero/storage/6AUP92PZ/Pääbo - 1985 - Molecular cloning of Ancient Egyptian mummy DNA.pdf;/Users/michelsen/Zotero/storage/GF2JK7A7/[email protected];/Users/michelsen/Zotero/storage/6GKS8RPC/314644a0.html}
}