diff --git a/paper/paper.bib b/paper/paper.bib index 31b3442..85d4071 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -39,7 +39,6 @@ @article{lavigne_automatic_2020 title = {Automatic discovery of chemical reactions using imposed activation}, doi = {10.26434/chemrxiv.13008500.v2}, url = {/articles/preprint/Automatic_discovery_of_chemical_reactions_using_imposed_activation/13008500/1}, - abstract = {Computational power and quantum chemical methods have improved immensely since computers were first applied to the study of reactivity, but the de novo prediction of chemical reactions has remained challenging. We show that complex reactions can be efficiently and autonomously predicted using chemical activation imposed by simple geometrical constraints. Our approach is demonstrated on realistic and challenging chemistry, such as a triple cyclization cascade involved in the total synthesis of a natural product and several oxidative addition reactions of complex drug-like molecules. Notably and in contrast with traditional hand-guided computational chemistry calculations, our method requires minimal human involvement and no prior knowledge of products or mechanisms. Imposed activation can be a transformational tool to screen for chemical reactivity and mechanisms as well as to study byproduct formation and decomposition.}, author = {Lavigne, Cyrille and dos Passos Gomes, Gabriel and Pollice, Robert and Aspuru-Guzik, Alan}, urldate = {2020-09-30}, date = {2020-09-29}, @@ -54,7 +53,6 @@ @article{smith:2020 url = {https://aip.scitation.org/doi/full/10.1063/5.0006002}, doi = {10.1063/5.0006002}, shorttitle = {{PSI}4 1.4}, - abstract = {{PSI}4 is a free and open-source ab initio electronic structure program providing implementations of Hartree–Fock, density functional theory, many-body perturbation theory, configuration interaction, density cumulant theory, symmetry-adapted perturbation theory, and coupled-cluster theory. Most of the methods are quite efficient, thanks to density fitting and multi-core parallelism. The program is a hybrid of C++ and Python, and calculations may be run with very simple text files or using the Python {API}, facilitating post-processing and complex workflows; method developers also have access to most of {PSI}4’s core functionalities via Python. Job specification may be passed using The Molecular Sciences Software Institute ({MolSSI}) {QCSCHEMA} data format, facilitating interoperability. A rewrite of our top-level computation driver, and concomitant adoption of the {MolSSI} {QCARCHIVE} {INFRASTRUCTURE} project, makes the latest version of {PSI}4 well suited to distributed computation of large numbers of independent tasks. The project has fostered the development of independent software components that may be reused in other quantum chemistry programs.}, pages = {184108}, number = {18}, journaltitle = {Journal of Chemical Physics}, @@ -65,12 +63,11 @@ @article{smith:2020 } @article{aquilante:2020, - title = {Modern quantum chemistry with [Open]Molcas}, + title = {Modern quantum chemistry with {[Open]Molcas}}, volume = {152}, issn = {0021-9606}, url = {https://aip.scitation.org/doi/full/10.1063/5.0004835}, doi = {10.1063/5.0004835}, - abstract = {{MOLCAS}/{OpenMolcas} is an ab initio electronic structure program providing a large set of computational methods from Hartree–Fock and density functional theory to various implementations of multiconfigurational theory. This article provides a comprehensive overview of the main features of the code, specifically reviewing the use of the code in previously reported chemical applications as well as more recent applications including the calculation of magnetic properties from optimized density matrix renormalization group wave functions.}, pages = {214117}, number = {21}, journaltitle = {Journal of Chemical Physics}, @@ -87,7 +84,6 @@ @article{kuhne:2020 url = {https://aip.scitation.org/doi/full/10.1063/5.0007045}, doi = {10.1063/5.0007045}, shorttitle = {{CP}2K}, - abstract = {{CP}2K is an open source electronic structure and molecular dynamics software package to perform atomistic simulations of solid-state, liquid, molecular, and biological systems. It is especially aimed at massively parallel and linear-scaling electronic structure methods and state-of-the-art ab initio molecular dynamics simulations. Excellent performance for electronic structure calculations is achieved using novel algorithms implemented for modern high-performance computing systems. This review revisits the main capabilities of {CP}2K to perform efficient and accurate electronic structure simulations. The emphasis is put on density functional theory and multiple post–Hartree–Fock methods using the Gaussian and plane wave approach and its augmented all-electron extension.}, pages = {194103}, number = {19}, journaltitle = {Journal of Chemical Physics}, @@ -104,7 +100,6 @@ @article{apra:2020 url = {https://aip.scitation.org/doi/full/10.1063/5.0004997}, doi = {10.1063/5.0004997}, shorttitle = {{NWChem}}, - abstract = {Specialized computational chemistry packages have permanently reshaped the landscape of chemical and materials science by providing tools to support and guide experimental efforts and for the prediction of atomistic and electronic properties. In this regard, electronic structure packages have played a special role by using first-principle-driven methodologies to model complex chemical and materials processes. Over the past few decades, the rapid development of computing technologies and the tremendous increase in computational power have offered a unique chance to study complex transformations using sophisticated and predictive many-body techniques that describe correlated behavior of electrons in molecular and condensed phase systems at different levels of theory. In enabling these simulations, novel parallel algorithms have been able to take advantage of computational resources to address the polynomial scaling of electronic structure methods. In this paper, we briefly review the {NWChem} computational chemistry suite, including its history, design principles, parallel tools, current capabilities, outreach, and outlook.}, pages = {184102}, number = {18}, journaltitle = {Journal of Chemical Physics}, @@ -120,7 +115,6 @@ @article{barca:2020 issn = {0021-9606}, url = {https://aip.scitation.org/doi/full/10.1063/5.0005188}, doi = {10.1063/5.0005188}, - abstract = {A discussion of many of the recently implemented features of {GAMESS} (General Atomic and Molecular Electronic Structure System) and {LibCChem} (the C++ {CPU}/{GPU} library associated with {GAMESS}) is presented. These features include fragmentation methods such as the fragment molecular orbital, effective fragment potential and effective fragment molecular orbital methods, hybrid {MPI}/{OpenMP} approaches to Hartree–Fock, and resolution of the identity second order perturbation theory. Many new coupled cluster theory methods have been implemented in {GAMESS}, as have multiple levels of density functional/tight binding theory. The role of accelerators, especially graphical processing units, is discussed in the context of the new features of {LibCChem}, as it is the associated problem of power consumption as the power of computers increases dramatically. The process by which a complex program suite such as {GAMESS} is maintained and developed is considered. Future developments are briefly summarized.}, pages = {154102}, number = {15}, journaltitle = {Journal of Chemical Physics}, @@ -137,7 +131,6 @@ @article{romero:2020 url = {https://aip.scitation.org/doi/full/10.1063/1.5144261}, doi = {10.1063/1.5144261}, shorttitle = {{ABINIT}}, - abstract = {abinit is probably the first electronic-structure package to have been released under an open-source license about 20 years ago. It implements density functional theory, density-functional perturbation theory ({DFPT}), many-body perturbation theory ({GW} approximation and Bethe–Salpeter equation), and more specific or advanced formalisms, such as dynamical mean-field theory ({DMFT}) and the “temperature-dependent effective potential” approach for anharmonic effects. Relying on planewaves for the representation of wavefunctions, density, and other space-dependent quantities, with pseudopotentials or projector-augmented waves ({PAWs}), it is well suited for the study of periodic materials, although nanostructures and molecules can be treated with the supercell technique. The present article starts with a brief description of the project, a summary of the theories upon which abinit relies, and a list of the associated capabilities. It then focuses on selected capabilities that might not be present in the majority of electronic structure packages either among planewave codes or, in general, treatment of strongly correlated materials using {DMFT}; materials under finite electric fields; properties at nuclei (electric field gradient, Mössbauer shifts, and orbital magnetization); positron annihilation; Raman intensities and electro-optic effect; and {DFPT} calculations of response to strain perturbation (elastic constants and piezoelectricity), spatial dispersion (flexoelectricity), electronic mobility, temperature dependence of the gap, and spin-magnetic-field perturbation. The abinit {DFPT} implementation is very general, including systems with van der Waals interaction or with noncollinear magnetism. Community projects are also described: generation of pseudopotential and {PAW} datasets, high-throughput calculations (databases of phonon band structure, second-harmonic generation, and {GW} computations of bandgaps), and the library libpaw. abinit has strong links with many other software projects that are briefly mentioned.}, pages = {124102}, number = {12}, journaltitle = {Journal of Chemical Physics}, @@ -172,7 +165,6 @@ @article{weimer:2008 issn = {0164-0925}, url = {https://doi.org/10.1145/1330017.1330019}, doi = {10.1145/1330017.1330019}, - abstract = {It is difficult to write programs that behave correctly in the presence of run-time errors. Proper behavior in the face of exceptional situations is important to the reliability of long-running programs. Existing programming language features often provide poor support for executing clean-up code and for restoring invariants. We present a data-flow analysis for finding a certain class of exception-handling defects: those related to a failure to release resources or to clean up properly along all paths. Many real-world programs violate such resource usage rules because of incorrect exception handling. Our flow-sensitive analysis keeps track of outstanding obligations along program paths and does a precise modeling of control flow in the presence of exceptions. Using it, we have found over 1,300 exception handling defects in over 5 million lines of Java code. Based on those defects we propose a programming language feature, the compensation stack, that keeps track of obligations at run time and ensures that they are discharged. We present a type system for compensation stacks that tracks collections of obligations. Finally, we present case studies to demonstrate that this feature is natural, efficient, and can improve reliability.}, pages = {8:1--8:51}, number = {2}, journaltitle = {ACM Transactions on Programming Languages and Systems}, @@ -192,7 +184,6 @@ @article{maymounkov_koji_2018 title = {Koji: Automating pipelines with mixed-semantics data sources}, url = {http://arxiv.org/abs/1901.01908}, shorttitle = {Koji}, - abstract = {We propose a new result-oriented semantic for de ning data processing work ows that manipulate data in di erent semantic forms ( les or services) in a uni ed manner. is approach enables users to de ne work ows for a vast variety of reproducible data-processing tasks in a simple declarative manner which focuses on applicationlevel results, while automating all control-plane considerations (like failure recovery without loss of progress and computation reuse) behind the scenes.}, journaltitle = {{arXiv}:1901.01908 [cs]}, author = {Maymounkov, Petar}, urldate = {2021-04-20}, @@ -209,7 +200,6 @@ @article{koster_snakemakescalable_2012 issn = {1367-4803}, url = {https://doi.org/10.1093/bioinformatics/bts480}, doi = {10.1093/bioinformatics/bts480}, - abstract = {Summary: Snakemake is a workflow engine that provides a readable Python-based workflow definition language and a powerful execution environment that scales from single-core workstations to compute clusters without modifying the workflow. It is the first system to support the use of automatically inferred multiple named wildcards (or variables) in input and output filenames.Availability:http://snakemake.googlecode.com.Contact:johannes.koester@uni-due.de}, pages = {2520--2522}, number = {19}, journaltitle = {Bioinformatics}, @@ -235,7 +225,6 @@ @inproceedings{gamblin_spack_2015 url = {https://www.computer.org/csdl/proceedings-article/sc/2015/2807623/12OmNBf94Xq}, doi = {10.1145/2807591.2807623}, shorttitle = {The Spack package manager}, - abstract = {Large {HPC} centers spend considerable time supporting software for thousands of users, but the complexity of {HPC} software is quickly outpacing the capabilities of existing software management tools. Scientific applications require specific versions of compilers, {MPI}, and other dependency libraries, so using a single, standard software stack is infeasible. However, managing many configurations is difficult because the configuration space is combinatorial in size. We introduce Spack, a tool used at Lawrence Livermore National Laboratory to manage this complexity. Spack provides a novel, recursive specification syntax to invoke parametric builds of packages and dependencies. It allows any number of builds to coexist on the same system, and it ensures that installed packages can find their dependencies, regardless of the environment. We show through real-world use cases that Spack supports diverse and demanding applications, bringing order to {HPC} software chaos.}, eventtitle = {{SC}15: International Conference for High-Performance Computing, Networking, Storage and Analysis}, pages = {1--12}, publisher = {{IEEE} Computer Society}, @@ -247,7 +236,6 @@ @inproceedings{gamblin_spack_2015 @article{dolstra_nix_2004, title = {Nix: A Safe and Policy-Free System for Software Deployment}, - abstract = {Existing systems for software deployment are neither safe nor sufficiently flexible. Primary safety issues are the inability to enforce reliable specification of component dependencies, and the lack of support for multiple versions or variants of a component. This renders deployment operations such as upgrading or deleting components dangerous and unpredictable. A deployment system must also be flexible (i.e., policy-free) enough to support both centralised and local package management, and to allow a variety of mechanisms for transferring components. In this paper we present Nix, a deployment system that addresses these issues through a simple technique of using cryptographic hashes to compute unique paths for component instances.}, pages = {14}, author = {Dolstra, Eelco and de Jonge, Merijn and Visser, Eelco}, date = {2004}, @@ -258,9 +246,8 @@ @article{dolstra_nix_2004 } @book{klabnik_rust_2019, - title = {The Rust Programming Language (Covers Rust 2018)}, + title = {The {R}ust Programming Language (Covers {R}ust 2018)}, isbn = {978-1-71850-044-0}, - abstract = {The official book on the Rust programming language, written by the Rust development team at the Mozilla Foundation, fully updated for Rust 2018. The Rust Programming Language is the official book on Rust: an open source systems programming language that helps you write faster, more reliable software. Rust offers control over low-level details (such as memory usage) in combination with high-level ergonomics, eliminating the hassle traditionally associated with low-level languages. The authors of The Rust Programming Language, members of the Rust Core Team, share their knowledge and experience to show you how to take full advantage of Rust's features--from installation to creating robust and scalable programs. You'll begin with basics like creating functions, choosing data types, and binding variables and then move on to more advanced concepts, such as: - Ownership and borrowing, lifetimes, and traits - Using Rust's memory safety guarantees to build fast, safe programs - Testing, error handling, and effective refactoring - Generics, smart pointers, multithreading, trait objects, and advanced pattern matching - Using Cargo, Rust's built-in package manager, to build, test, and document your code and manage dependencies - How best to use Rust's advanced compiler with compiler-led programming techniques You'll find plenty of code examples throughout the book, as well as three chapters dedicated to building complete projects to test your learning: a number guessing game, a Rust implementation of a command line tool, and a multithreaded server. New to this edition: An extended section on Rust macros, an expanded chapter on modules, and appendixes on Rust development tools and editions.}, pagetotal = {561}, publisher = {No Starch Press}, author = {Klabnik, Steve and Nichols, Carol}, @@ -273,7 +260,6 @@ @book{klabnik_rust_2019 @book{chacon_pro_2014, title = {Pro Git}, isbn = {978-1-4842-0076-6}, - abstract = {Pro Git (Second Edition) is your fully-updated guide to Git and its usage in the modern world. Git has come a long way since it was first developed by Linus Torvalds for Linux kernel development. It has taken the open source world by storm since its inception in 2005, and this book teaches you how to use it like a pro. Effective and well-implemented version control is a necessity for successful web projects, whether large or small. With this book you’ll learn how to master the world of distributed version workflow, use the distributed features of Git to the full, and extend Git to meet your every need. Written by Git pros Scott Chacon and Ben Straub, Pro Git (Second Edition) builds on the hugely successful first edition, and is now fully updated for Git version 2.0, as well as including an indispensable chapter on {GitHub}. It’s the best book for all your Git needs.}, pagetotal = {441}, publisher = {Apress}, author = {Chacon, Scott and Straub, Ben}, @@ -290,7 +276,6 @@ @inproceedings{yoo_slurm_2003 doi = {10.1007/10968987_3}, series = {Lecture Notes in Computer Science}, shorttitle = {{SLURM}}, - abstract = {A new cluster resource management system called Simple Linux Utility Resource Management ({SLURM}) is described in this paper. {SLURM}, initially developed for large Linux clusters at the Lawrence Livermore National Laboratory ({LLNL}), is a simple cluster manager that can scale to thousands of processors. {SLURM} is designed to be flexible and fault-tolerant and can be ported to other clusters of different size and architecture with minimal effort. We are certain that {SLURM} will benefit both users and system architects by providing them with a simple, robust, and highly scalable parallel job execution environment for their cluster system.}, pages = {44--60}, booktitle = {Job Scheduling Strategies for Parallel Processing}, publisher = {Springer}, @@ -307,7 +292,6 @@ @inproceedings{stevens_first_2017 isbn = {978-3-319-63688-7}, doi = {10.1007/978-3-319-63688-7_19}, series = {Lecture Notes in Computer Science}, - abstract = {{SHA}-1 is a widely used 1995 {NIST} cryptographic hash function standard that was officially deprecated by {NIST} in 2011 due to fundamental security weaknesses demonstrated in various analyses and theoretical attacks.Despite its deprecation, {SHA}-1 remains widely used in 2017 for document and {TLS} certificate signatures, and also in many software such as the {GIT} versioning system for integrity and backup purposes.A key reason behind the reluctance of many industry players to replace {SHA}-1 with a safer alternative is the fact that finding an actual collision has seemed to be impractical for the past eleven years due to the high complexity and computational cost of the attack.In this paper, we demonstrate that {SHA}-1 collision attacks have finally become practical by providing the first known instance of a collision. Furthermore, the prefix of the colliding messages was carefully chosen so that they allow an attacker to forge two distinct {PDF} documents with the same {SHA}-1 hash that display different arbitrarily-chosen visual contents.We were able to find this collision by combining many special cryptanalytic techniques in complex ways and improving upon previous work. In total the computational effort spent is equivalent to 263.1263.12{\textasciicircum}\{63.1\} calls to {SHA}-1’s compression function, and took approximately 6 500 {CPU} years and 100 {GPU} years. While the computational power spent on this collision is larger than other public cryptanalytic computations, it is still more than 100 000 times faster than a brute force search.}, pages = {570--596}, booktitle = {Advances in Cryptology – {CRYPTO} 2017}, publisher = {Springer International Publishing}, diff --git a/paper/paper.md b/paper/paper.md index 64887fe..05dfa4a 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -49,7 +49,7 @@ exceptions are not properly caught [@weimer:2008]. `funsies` is a set of python programs and modules to describe, execute and analyze computational workflows, with first-class support for shell scripting. It includes a lightweight, decentralized workflow engine backed by a NoSQL -store. Using `funsies`, external program and python-based computations +store. Using `funsies`, external programs and Python-based computations are easily mixed together. Errors are detected and propagated throughout computations. Automatic, transparent incremental computing (based on a hash tree data structure) provides a convenient environment for iterative @@ -89,7 +89,7 @@ needs. Using the `funsies` library, general computational workflows are described in lazily evaluated Python code. Operations in `funsies` are taken to be pure, that is, all operation outputs are entirely and solely determined by their -inputs. Workflows are orchestrated using python by manipulating pointers to +inputs. Workflows are orchestrated using Python by manipulating pointers to yet-to-be-calculated data. Workflow instructions are transparently translated and saved as graph elements in a Redis database. @@ -141,7 +141,7 @@ compile time, a technique similar to MapReduce [@dean_mapreduce_2004]. As of now, we have published one project [@pollice:2021] that used an earlier iteration of `funsies`, and are using it in multiple ongoing inquiries. We -provide several sample workflows on Github, with a focus on computational +provide several sample workflows on GitHub, with a focus on computational chemistry, quantum computing, and high-performance computing infrastructure. We intend to maintain `funsies` and of course welcome [collaborations from