maltzahn.bib

%% This BibTeX bibliography file was created using BibDesk.
%% https://bibdesk.sourceforge.io/

%% Created for Carlos Maltzahn at 2023-09-03 17:15:09 -0700 


%% Saved with string encoding Unicode (UTF-8) 


@inproceedings{liu:hpec23,
	abstract = {High-performance computing (HPC) systems researchers have proposed using current, programmable network interface cards (or SmartNICs) to offload data management services that would otherwise consume host processor cycles in a platform. While this work has successfully mapped data pipelines to a collection of SmartNICs, users require a flexible means of inspecting in-transit data to assess the live state of the system. In this paper, we explore SmartNIC-driven opportunistic query execution, i.e., enabling the SmartNIC to make a decision about whether to execute a query operation locally (i.e., ``offload'') or defer execution to the client (i.e., ``push-back''). Characterizations of different parts of the end-to-end query path allow the decision engine to make complexity predictions that would not be feasible by the client alone.},
	address = {Virtual},
	author = {Jianshen Liu and Carlos Maltzahn and Craig Ulmer},
	booktitle = {HPEC '23},
	date-added = {2023-08-29 19:45:03 -0700},
	date-modified = {2023-08-29 19:56:34 -0700},
	keywords = {papers, smartnics, querying, queryprocessing, streaming, streamprocessing, analysis},
	month = {September 25-29},
	title = {{Opportunistic Query Execution on SmartNICs for Analyzing In-Transit Data}},
	year = {2023}}

@inproceedings{ulmer:compsys23,
	address = {St. Petersburg, FL, USA},
	author = {Craig Ulmer and Jianshen Liu and Carlos Maltzahn and Matthew L. Curry},
	booktitle = {2nd Workshop on Composable Systems (COMPSYS 2023, co-located with IPDPS 2023)},
	date-added = {2023-03-09 10:29:28 -0800},
	date-modified = {2023-03-09 10:30:50 -0800},
	keywords = {smartnics, composability, datamanagement},
	month = {May 15-19},
	title = {{Extending Composable Data Services into SmartNICS (Best Paper Award)}},
	year = {2023},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1UtVi91bG1lci1jb21wc3lzMjMucGRmTxEBcgAAAAABcgACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////E3VsbWVyLWNvbXBzeXMyMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////gN9l2AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANVLVYAAAIAOi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6VS1WOnVsbWVyLWNvbXBzeXMyMy5wZGYADgAoABMAdQBsAG0AZQByAC0AYwBvAG0AcABzAHkAcwAyADMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADhVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9VLVYvdWxtZXItY29tcHN5czIzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABXAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAc0=}}

@unpublished{amvrosiadis:nsfvision18,
	author = {George Amvrosiadis and Ali R. Butt and Vasily Tarasov and Ming Zhao and others},
	date-added = {2023-01-13 13:20:46 -0800},
	date-modified = {2023-01-13 13:20:46 -0800},
	keywords = {papers, vision, storage, systems, research},
	month = {May 30 - June 1},
	note = {Report on NSF Visioning Workshop},
	title = {Data Storage Research Vision 2025},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW12cm9zaWFkaXMtbnNmdmlzaW9uMTgucGRmTxEBjAAAAAABjAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////G2FtdnJvc2lhZGlzLW5zZnZpc2lvbjE4LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////X+gitAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFBAAACAEAvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkE6YW12cm9zaWFkaXMtbnNmdmlzaW9uMTgucGRmAA4AOAAbAGEAbQB2AHIAbwBzAGkAYQBkAGkAcwAtAG4AcwBmAHYAaQBzAGkAbwBuADEAOAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW12cm9zaWFkaXMtbnNmdmlzaW9uMTgucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAF0AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB7Q==},
	bdsk-url-1 = {https://www.overleaf.com/7988123186fbmpsqghjkgr}}

@inproceedings{jimenez:agu18,
	author = {Ivo Jimenez and Carlos Maltzahn},
	booktitle = {AGU Fall Meeting},
	date-added = {2023-01-11 22:59:55 -0800},
	date-modified = {2023-01-11 23:06:28 -0800},
	keywords = {reproducibility},
	month = {December 12-14},
	title = {Reproducible, Automated and Portable Computational and Data Science Experimentation Pipelines with Popper},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LWFndTE4LnBkZk8RAWoAAAAAAWoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xFqaW1lbmV6LWFndTE4LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+TxDwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADgvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaW1lbmV6LWFndTE4LnBkZgAOACQAEQBqAGkAbQBlAG4AZQB6AC0AYQBnAHUAMQA4AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA2VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2ppbWVuZXotYWd1MTgucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFUAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABww==}}

@inproceedings{lefevre:snia20,
	address = {Virtual},
	author = {Jeff LeFevre and Carlos Maltzahn},
	booktitle = {SNIA SDC 2020},
	date-added = {2023-01-11 22:37:16 -0800},
	date-modified = {2023-01-11 22:40:46 -0800},
	keywords = {programmable, storage},
	month = {September 23},
	title = {SkyhookDM: Storage and Management of Tabular Data in Ceph},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS1zbmlhMjAucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EmxlZmV2cmUtc25pYTIwLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////f5OqIAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFMAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkw6bGVmZXZyZS1zbmlhMjAucGRmAAAOACYAEgBsAGUAZgBlAHYAcgBlAC0AcwBuAGkAYQAyADAALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xlZmV2cmUtc25pYTIwLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS1zbmlhMjAtc2xpZGVzLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xlsZWZldnJlLXNuaWEyMC1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+TrUwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxlZmV2cmUtc25pYTIwLXNsaWRlcy5wZGYADgA0ABkAbABlAGYAZQB2AHIAZQAtAHMAbgBpAGEAMgAwAC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xlZmV2cmUtc25pYTIwLXNsaWRlcy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}

@inproceedings{chakraborty:sdc21,
	address = {Virtual},
	author = {Jayjeet Chakraborty and Carlos Maltzahn},
	booktitle = {SNIA SDC 2021},
	date-added = {2023-01-11 22:30:29 -0800},
	date-modified = {2023-01-11 22:32:09 -0800},
	keywords = {programmable, storage},
	month = {September 28-29},
	title = {{SkyhookDM: An Arrow-Native Storage System}},
	year = {2021},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktc25pYTIxLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZjaGFrcmFib3J0eS1zbmlhMjEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+TplAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpDOmNoYWtyYWJvcnR5LXNuaWEyMS5wZGYAAA4ALgAWAGMAaABhAGsAcgBhAGIAbwByAHQAeQAtAHMAbgBpAGEAMgAxAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaGFrcmFib3J0eS1zbmlhMjEucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA4Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktc25pYTIxLXNsaWRlcy5wZGZPEQGUAAAAAAGUAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8dY2hha3JhYm9ydHktc25pYTIxLXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9/k6/EAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAQi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaGFrcmFib3J0eS1zbmlhMjEtc2xpZGVzLnBkZgAOADwAHQBjAGgAYQBrAHIAYQBiAG8AcgB0AHkALQBzAG4AaQBhADIAMQAtAHMAbABpAGQAZQBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBAVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaGFrcmFib3J0eS1zbmlhMjEtc2xpZGVzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABfAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfc=}}

@inproceedings{malik:precs22,
	author = {Tanu Malik and Anjo Vahldiek-Oberwagner and Ivo Jimenez and Carlos Maltzahn},
	booktitle = {P-RECS'22},
	date-added = {2023-01-11 21:05:52 -0800},
	date-modified = {2023-01-11 21:07:18 -0800},
	doi = {10.1145/3526062.3536354},
	keywords = {reproducibility},
	title = {{Expanding the Scope of Artifact Evaluation at HPC Conferences: Experience of SC21}},
	year = {2022},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsaWstcHJlY3MyMi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbWFsaWstcHJlY3MyMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9/k1PsAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAU0AAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TTptYWxpay1wcmVjczIyLnBkZgAOACQAEQBtAGEAbABpAGsALQBwAHIAZQBjAHMAMgAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWxpay1wcmVjczIyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@unpublished{zakaria:nixcon22,
	abstract = {Nix has introduced the world to store-based systems and ushered a new wave of reproducibility. These new systems however are built atop long established patterns and occasionally leverage them to band-aid over the problems Nix aims to solve.

How much further can we leverage the store abstraction to rethink long valued established patterns in Unix based operating systems? This talk will introduce some of the simple improvements one can uncover starting at the linking phase of object building and process startup.

The authors introduce Shrinkwrap which can greatly improve startup performance and further improve reproducibility for applications ported to Nix by making simple improvement to how libraries are discovered and leveraging the store further. Additional explorations for improvements during the linking phase will be discussed and explored. It's time we rethink everything.
},
	author = {Farid Zakaria and Tom Scogland and Carlos Maltzahn},
	date-added = {2022-11-07 19:32:09 -0800},
	date-modified = {2022-11-07 19:32:09 -0800},
	keywords = {linking, reproducibility, packaging},
	month = {October 20-22},
	note = {NixCon 2022, Paris, France},
	title = {Rethinking basic primitives for store based systems},
	year = {2022},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1gtWi96YWthcmlhLW5peGNvbjIyLnBkZk8RAXgAAAAAAXgAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xR6YWthcmlhLW5peGNvbjIyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////348MogAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADWC1aAAACADsvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlgtWjp6YWthcmlhLW5peGNvbjIyLnBkZgAADgAqABQAegBhAGsAYQByAGkAYQAtAG4AaQB4AGMAbwBuADIAMgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1gtWi96YWthcmlhLW5peGNvbjIyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHU},
	bdsk-url-1 = {https://drive.google.com/file/d/1uFE5UfvteXxkM4KCOjbSh52yGPa2hZtg/view}}

@unpublished{nsf:repeto22,
	abstract = {The Repeto project will foster community practices to make reproducibility a part of mainstream research and education activities in computer science. The project seeks to understand the cost/benefit equation of reproducibility for the computer science systems community, the factors that make reproducibility feasible or infeasible, as well as isolate factors (be they technical or usage oriented) that make practical reproducibility of experiments difficult. This research coordination network will develop a range of activities from teaching methodology for packaging experiments for cost-effective replication; using reproducibility in teaching; collaboration with reproducibility initiatives sponsored through conferences and institutions; community events emphasizing repeating or replicating experiments such as hackathons, competitions, or rankings; fostering repositories of replicable experiments and monitoring their usage/replication; to reporting on state of art and emergent requirements for the support of practical reproducibility. The outcomes of the proposal will be a collection of computer science experiments replicable on open platforms, an understanding of how much and to what extent they are used in mainstream research and education activities via relevant metrics, as well as a series of reports on current enablers and obstacles towards mainstream use of reproducibility in computer science research.

Replicable experiments will be created using platform programmability tools including the Chameleon environment and associated software such as CHI, Trovi, and Jupyter notebooks. This platform programmability approach allows experimenters to express complex experimental topologies in repeatable and persistent ways. Combining platform programmability with executable notebooks will allow investigators to capture the full experimental process for subsequent replication by other researchers.

This award by the CISE Office of Advanced Cyberinfrastructure is jointly supported by the CISE Computer and Networked Systems Division.

This award reflects NSF's statutory mission and has been deemed worthy of support through evaluation using the Foundation's intellectual merit and broader impacts review criteria.},
	author = {{National Science Foundation -- Office of Advanced Cyberinfrastructure (OAC)}},
	date-added = {2022-08-16 17:33:00 -0700},
	date-modified = {2022-08-16 18:27:26 -0700},
	keywords = {funding},
	month = {October},
	note = {Available at www.nsf.gov/awardsearch/showAward?AWD\_ID=2226407},
	title = {Collaborative Research: Disciplinary Improvements: Repeto: Building a Network for Practical Reproducibility in Experimental Computer Science},
	year = {2022}}

@inproceedings{liu:hpec22,
	abstract = {Many distributed applications implement complex data flows and need a flexible mechanism for routing data between producers and consumers. Recent advances in programmable network interface cards, or SmartNICs, represent an opportunity to offload data-flow tasks into the network fabric, thereby freeing the hosts to perform other work. System architects in this space face multiple questions about the best way to leverage SmartNICs as processing elements in data flows. In this paper, we advocate the use of Apache Arrow as a foundation to implement data flow tasks on SmartNICs. We report on our experience adapting a partitioning algorithm for particle data to Apache Arrow and measure the on-card processing performance for the BlueField-2 SmartNIC. Our experiments confirm that the BlueField-2's (de)compression hardware can have a significant impact on in-transit workflows where data must be unpacked, processed, and repacked.},
	address = {Virtual Event},
	author = {Jianshen Liu and Carlos Maltzahn and Matthew L. Curry and Craig Ulmer},
	booktitle = {2022 IEEE High Performance Extreme Computing Conference (IEEE HPEC 2022)},
	date-added = {2022-08-16 17:08:46 -0700},
	date-modified = {2022-08-16 18:44:04 -0700},
	keywords = {smartnics, offloading, datamanagement, hpc},
	month = {September 19-23},
	title = {{Processing Particle Data Flows with SmartNICs}},
	year = {2022},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWhwZWMyMi5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8ObGl1LWhwZWMyMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////981GukAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaXUtaHBlYzIyLnBkZgAADgAeAA4AbABpAHUALQBoAHAAZQBjADIAMgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWhwZWMyMi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==}}

@inproceedings{zakaria:sc22,
	abstract = {High Performance Computing (HPC) software stacks have become complex, with the dependencies of some applications numbering in the hundreds. Packaging, distributing, and administering software stacks of that scale is a complex undertaking anywhere. HPC systems deal with esoteric compilers, hardware, and a panoply of uncommon combinations. In this paper, we explore the mechanisms available for packaging software to find its own dependencies in the context of a taxonomy of software distribution, and discuss their benefits and pitfalls. We discuss workarounds for some common problems caused by using these composed stacks and introduce Shrinkwrap: A solution to producing binaries that directly load their dependencies from precise locations and in a precise order. Beyond simplifying the use of the binaries, this approach also speeds up loading as much as 7× for a large dynamically-linked MPI application in our evaluation.},
	address = {Dallas, TX},
	author = {Farid Zakaria and Thomas R. W. Scogland and Todd Gamblin and Carlos Maltzahn},
	booktitle = {SC22},
	date-added = {2022-08-09 12:51:12 -0700},
	date-modified = {2022-08-16 18:42:03 -0700},
	keywords = {linking, packaging, softwareengineering, oss, reproducibility, compiler},
	month = {November 13-18},
	title = {Mapping Out the HPC Dependency Chaos},
	year = {2022},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1gtWi96YWthcmlhLXNjMjIucGRmTxEBaAAAAAABaAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EHpha2FyaWEtc2MyMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////fGAhHAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANYLVoAAAIANy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6WC1aOnpha2FyaWEtc2MyMi5wZGYAAA4AIgAQAHoAYQBrAGEAcgBpAGEALQBzAGMAMgAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA1VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvWC1aL3pha2FyaWEtc2MyMi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFQAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABwA==}}

@unpublished{sloan:ucospo22,
	author = {{Alfred P. Sloan Foundation -- Better Software for Science Program}},
	date-added = {2022-08-04 06:46:49 -0700},
	date-modified = {2022-08-04 06:50:01 -0700},
	keywords = {funding},
	month = {January},
	note = {Available at sloan.org/grant-detail/9723},
	title = {{To pilot a postdoctoral fellowship on open source software development and support other activities at the University of California Santa Cruz Open Source Program Office}},
	year = {2022}}

@article{lieggi:rhrq22,
	author = {Stephanie Lieggi},
	date-added = {2022-05-10 16:11:16 -0700},
	date-modified = {2022-05-10 16:11:48 -0700},
	journal = {Red Hat Research Quarterly},
	keywords = {oss, ospo, academia},
	month = {February},
	number = {4},
	pages = {5--6},
	title = {Building a university {OSPO}: Bolstering academic research through open source},
	volume = {3},
	year = {2022},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJocnEyMi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbGllZ2dpLXJocnEyMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////96gPtQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaWVnZ2ktcmhycTIyLnBkZgAOACQAEQBsAGkAZQBnAGcAaQAtAHIAaAByAHEAMgAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9saWVnZ2ktcmhycTIyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@unpublished{chakraborty:arrowblog22,
	author = {Jayjeet Chakraborty and Carlos Maltzahn and David Li and Tom Drabas},
	date-added = {2022-05-06 12:28:50 -0700},
	date-modified = {2022-05-06 12:28:50 -0700},
	keywords = {computation, storage, programmable, datamanagement, ceph, arrow},
	month = {January 31},
	note = {Available at arrow.apache.org/blog/2022/01/31/skyhook-bringing-computation-to-storage-with-apache-arrow/},
	title = {Skyhook: Bringing Computation to Storage with Apache Arrow},
	year = {2022},
	bdsk-url-1 = {https://arrow.apache.org/blog/2022/01/31/skyhook-bringing-computation-to-storage-with-apache-arrow/}}

@inproceedings{chakraborty:ccgrid22,
	abstract = {With the ever-increasing dataset sizes, several file formats such as Parquet, ORC, and Avro have been developed to store data efficiently, save the network, and interconnect bandwidth at the price of additional CPU utilization. However, with the advent of networks supporting 25-100 Gb/s and storage devices delivering 1,000,000 reqs/sec, the CPU has become the bottleneck trying to keep up feeding data in and out of these fast devices. The result is that data access libraries executed on single clients are often CPU-bound and cannot utilize the scale-out benefits of distributed storage systems. One attractive solution to this problem is to offload data-reducing processing and filtering tasks to the storage layer. However, modifying legacy storage systems to support compute offloading is often tedious and requires an extensive understanding of the system internals. Previous approaches re-implemented functionality of data processing frameworks and access libraries for a particular storage system, a duplication of effort that might have to be repeated for different storage systems.

This paper introduces a new design paradigm that allows extending programmable object storage systems to embed existing, widely used data processing frameworks and access libraries into the storage layer with no modifications. In this approach, data processing frameworks and access libraries can evolve independently from storage systems while leveraging distributed storage systems' scale-out and availability properties. We present Skyhook, an example implementation of our design paradigm using Ceph, Apache Arrow, and Parquet. We provide a brief performance evaluation of Skyhook and discuss key results.},
	address = {Taormina (Messina), Italy},
	author = {Jayjeet Chakraborty and Ivo Jimenez and Sebastiaan Alvarez Rodriguez and Alexandru Uta and Jeff LeFevre and Carlos Maltzahn},
	booktitle = {CCGrid22},
	date-added = {2022-04-11 19:45:31 -0700},
	date-modified = {2022-04-11 19:57:58 -0700},
	keywords = {papers, programmable, storage, systems, arrow, nsf1836650, nsf1705021, nsf1764102},
	month = {May 16-19},
	title = {Skyhook: Towards an Arrow-Native Storage System},
	year = {2022},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktY2NncmlkMjIucGRmTxEBggAAAAABggACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////GGNoYWtyYWJvcnR5LWNjZ3JpZDIyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////eejbkAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACAD0vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y2hha3JhYm9ydHktY2NncmlkMjIucGRmAAAOADIAGABjAGgAYQBrAHIAYQBiAG8AcgB0AHkALQBjAGMAZwByAGkAZAAyADIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADtVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9DL2NoYWtyYWJvcnR5LWNjZ3JpZDIyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHg}}

@article{harrell:tpds22,
	abstract = {In this special section we bring you a practice and experience effort in reproducibility for large-scale computational science at SC20. This section includes nine critiques, each by a student team that reproduced results from a paper published at SC19, during the following year's Student Cluster Competition. The paper is also included in this section and has been expanded upon, now including an analysis of the outcomes of the students' reproducibility experiments. Lastly, this special section encapsulates a variety of advances in reproducibility in the SC conference series technical program.},
	author = {Stephen Lien Harrell and Scott Michael and Carlos Maltzahn},
	date-added = {2022-04-11 19:38:53 -0700},
	date-modified = {2022-04-11 19:42:38 -0700},
	journal = {IEEE Transactions on Parallel and Distributed Systems},
	keywords = {reproducibility, conference, hpc},
	month = {September},
	number = {9},
	pages = {2011--2013},
	title = {Advancing Adoption of Reproducibility in HPC: A Preface to the Special Section},
	volume = {33},
	year = {2022},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGFycmVsbC10cGRzMjIucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EmhhcnJlbGwtdHBkczIyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////eejVfAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFIAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkg6aGFycmVsbC10cGRzMjIucGRmAAAOACYAEgBoAGEAcgByAGUAbABsAC0AdABwAGQAcwAyADIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9IL2hhcnJlbGwtdHBkczIyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@inproceedings{rodriguez:bigdata21,
	abstract = {Distributed data processing ecosystems are widespread and their components are highly specialized, such that efficient interoperability is urgent. Recently, Apache Arrow was chosen by the community to serve as a format mediator, providing efficient in-memory data representation. Arrow enables efficient data movement between data processing and storage engines, significantly improving interoperability and overall performance. In this work, we design a new zero-cost data interoperability layer between Apache Spark and Arrow-based data sources through the Arrow Dataset API. Our novel data interface helps separate the computation (Spark) and data (Arrow) layers. This enables practitioners to seamlessly use Spark to access data from all Arrow Dataset API-enabled data sources and frameworks. To benefit our community, we open-source our work and show that consuming data through Apache Arrow is zero-cost: our novel data interface is either on-par or more performant than native Spark.},
	address = {Virtual Event},
	author = {Sebastiaan Alvarez Rodriguez and Jayjeet Chakraborty and Aaron Chu and Ivo Jimenez and Jeff LeFevre and Carlos Maltzahn and Alexandru Uta},
	booktitle = {2021 IEEE International Conference on Big Data (IEEE BigData 2021)},
	date-added = {2022-04-11 19:33:51 -0700},
	date-modified = {2022-04-11 19:59:07 -0700},
	keywords = {papers, spark, arrow, performance, nsf1836650},
	month = {December 15-18},
	title = {Zero-Cost, Arrow-Enabled Data Interface for Apache Spark},
	year = {2021},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1EtUi9yb2RyaWd1ZXotYmlnZGF0YTIxLnBkZk8RAYIAAAAAAYIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xdyb2RyaWd1ZXotYmlnZGF0YTIxLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3nozqQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADUS1SAAACAD4vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlEtUjpyb2RyaWd1ZXotYmlnZGF0YTIxLnBkZgAOADAAFwByAG8AZAByAGkAZwB1AGUAegAtAGIAaQBnAGQAYQB0AGEAMgAxAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA8VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUS1SL3JvZHJpZ3Vlei1iaWdkYXRhMjEucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFsAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB4Q==}}

@unpublished{rodriguez:arxiv21,
	abstract = {Distributed data processing ecosystems are widespread and their components are highly specialized, such that efficient interoperability is urgent. Recently, Apache Arrow was chosen by the community to serve as a format mediator, providing efficient in-memory data representation. Arrow enables efficient data movement between data processing and storage engines, significantly improving interoperability and overall performance. In this work, we design a new zero-cost data interoperability layer between Apache Spark and Arrow-based data sources through the Arrow Dataset API. Our novel data interface helps separate the computation (Spark) and data (Arrow) layers. This enables practitioners to seamlessly use Spark to access data from all Arrow Dataset API-enabled data sources and frameworks. To benefit our community, we open-source our work and show that consuming data through Apache Arrow is zero-cost: our novel data interface is either on-par or more performant than native Spark.},
	author = {Sebastiaan Alvarez Rodriguez and Jayjeet Chakraborty and Aaron Chu and Ivo Jimenez and Jeff LeFevre and Carlos Maltzahn and Alexandru Uta},
	date-added = {2021-07-23 11:42:12 -0700},
	date-modified = {2021-07-23 11:55:28 -0700},
	keywords = {papers, spark, arrow, performance},
	month = {June 24},
	note = {arxiv.org/abs/2106.13020 [cs.DC]},
	title = {Zero-Cost, Arrow-Enabled Data Interface for Apache Spark},
	year = {2021},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1EtUi9yb2RyaWd1ZXotYXJ4aXYyMS5wZGZPEQF6AAAAAAF6AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Vcm9kcmlndWV6LWFyeGl2MjEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////90gXGEAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA1EtUgAAAgA8LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpRLVI6cm9kcmlndWV6LWFyeGl2MjEucGRmAA4ALAAVAHIAbwBkAHIAaQBnAHUAZQB6AC0AYQByAHgAaQB2ADIAMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1EtUi9yb2RyaWd1ZXotYXJ4aXYyMS5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHX}}

@unpublished{liu:arxiv21,
	abstract = {High-performance computing (HPC) researchers have long envisioned scenarios where application workflows could be improved through the use of programmable processing elements embedded in the network fabric. Recently, vendors have introduced programmable Smart Network Interface Cards (SmartNICs) that enable computations to be offloaded to the edge of the network. There is great interest in both the HPC and high-performance data analytics communities in understanding the roles these devices may play in the data paths of upcoming systems.

This paper focuses on characterizing both the networking and computing aspects of NVIDIA's new BlueField-2 SmartNIC when used in an Ethernet environment. For the networking evaluation we conducted multiple transfer experiments between processors located at the host, the SmartNIC, and a remote host. These tests illuminate how much processing headroom is available on the SmartNIC during transfers. For the computing evaluation we used the stress-ng benchmark to compare the BlueField-2 to other servers and place realistic bounds on the types of offload operations that are appropriate for the hardware.

Our findings from this work indicate that while the BlueField-2 provides a flexible means of processing data at the network's edge, great care must be taken to not overwhelm the hardware. While the host can easily saturate the network link, the SmartNIC's embedded processors may not have enough computing resources to sustain more than half the expected bandwidth when using kernel-space packet processing. From a computational perspective, encryption operations, memory operations under contention, and on-card IPC operations on the SmartNIC perform significantly better than the general-purpose servers used for comparisons in our experiments. Therefore, applications that mainly focus on these operations may be good candidates for offloading to the SmartNIC. },
	author = {Jianshen Liu and Carlos Maltzahn and Craig Ulmer and Matthew Leon Curry},
	date-added = {2021-07-23 11:37:49 -0700},
	date-modified = {2021-07-23 12:02:34 -0700},
	keywords = {papers, smartnics, performance},
	month = {May 14},
	note = {arxiv.org/abs/2105.06619 [cs.NI]},
	title = {Performance Characteristics of the BlueField-2 SmartNIC},
	year = {2021},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWFyeGl2MjEucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D2xpdS1hcnhpdjIxLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////dIFtZAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFMAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkw6bGl1LWFyeGl2MjEucGRmAA4AIAAPAGwAaQB1AC0AYQByAHgAaQB2ADIAMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWFyeGl2MjEucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==},
	bdsk-url-1 = {https://www.nextplatform.com/2021/05/24/testing-the-limits-of-the-bluefield-2-smartnic/}}

@unpublished{chakraborty:arxiv21,
	abstract = {With the ever-increasing dataset sizes, several file formats like Parquet, ORC, and Avro have been developed to store data efficiently and to save network and interconnect bandwidth at the price of additional CPU utilization. However, with the advent of networks supporting 25-100 Gb/s and storage devices delivering 1,000,000 reqs/sec the CPU has become the bottleneck, trying to keep up feeding data in and out of these fast devices. The result is that data access libraries executed on single clients are often CPU-bound and cannot utilize the scale-out benefits of distributed storage systems. One attractive solution to this problem is to offload data-reducing processing and filtering tasks to the storage layer. However, modifying legacy storage systems to support compute offloading is often tedious and requires extensive understanding of the internals. Previous approaches re-implemented functionality of data processing frameworks and access library for a particular storage system, a duplication of effort that might have to be repeated for different storage systems. In this paper, we introduce a new design paradigm that allows extending programmable object storage systems to embed existing, widely used data processing frameworks and access libraries into the storage layer with minimal modifications. In this approach data processing frameworks and access libraries can evolve independently from storage systems while leveraging the scale-out and availability properties of distributed storage systems. We present one example implementation of our design paradigm using Ceph, Apache Arrow, and Parquet. We provide a brief performance evaluation of our implementation and discuss key results. },
	author = {Jayjeet Chakraborty and Ivo Jimenez and Sebastiaan Alvarez Rodriguez and Alexandru Uta and Jeff LeFevre and Carlos Maltzahn},
	date-added = {2021-07-23 10:50:21 -0700},
	date-modified = {2021-07-23 13:47:37 -0700},
	keywords = {papers, programmable, storage, systems, arrow},
	month = {May 21},
	note = {arxiv.org/abs/2105.09894 [cs.DC]},
	title = {Towards an Arrow-native Storage System},
	year = {2021},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktYXJ4aXYyMS5wZGZPEQF8AAAAAAF8AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8XY2hha3JhYm9ydHktYXJ4aXYyMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9zcGnQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAPC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaGFrcmFib3J0eS1hcnhpdjIxLnBkZgAOADAAFwBjAGgAYQBrAHIAYQBiAG8AcgB0AHkALQBhAHIAeABpAHYAMgAxAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA6VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaGFrcmFib3J0eS1hcnhpdjIxLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABZAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdk=}}

@article{chu:epjconf20,
	abstract = {Access libraries such as ROOT and HDF5 allow users to interact with datasets using high level abstractions, like coordinate systems and associated slicing operations. Unfortunately, the implementations of access libraries are based on outdated assumptions about storage systems interfaces and are generally unable to fully benefit from modern fast storage devices. For example, access libraries often implement buffering and data layout that assume that large, single-threaded sequential access patterns are causing less overall latency than small parallel random access: while this is true for spinning media, it is not true for flash media. The situation is getting worse with rapidly evolving storage devices such as non-volatile memory and ever larger datasets. Our Skyhook Dataset Mapping project explores distributed dataset mapping infrastructures that can integrate and scale out existing access libraries using Ceph's extensible object model, avoiding reimplementation or even modifications of these access libraries as much as possible. These programmable storage extensions coupled with our distributed dataset mapping techniques enable: 1) access library operations to be offloaded to storage system servers, 2) the independent evolution of access libraries and storage systems and 3) fully leveraging of the existing load balancing, elasticity, and failure management of distributed storage systems like Ceph. They also create more opportunities to conduct storage server-local optimizations specific to storage servers. For example, storage servers might include local key/value stores combined with chunk stores that require different optimizations than a local file system. As storage servers evolve to support new storage devices like non-volatile memory, these server-local optimizations can be implemented while minimizing disruptions to applications. We will report progress on the means by which distributed dataset mapping can be abstracted over particular access libraries, including access libraries for ROOT data, and how we address some of the challenges revolving around data partitioning and composability of access operations.},
	author = {Aaron Chu and Jeff LeFevre and Carlos Maltzahn and Aldrin Montana and Peter Alvaro and Dana Robinson and Quincey Koziol},
	date-added = {2020-12-10 16:45:30 -0800},
	date-modified = {2022-07-02 17:49:58 -0700},
	journal = {EPJ Web Conf.},
	keywords = {papers, programmable, declarative, objectstorage, nsf1836650},
	month = {November 16},
	number = {2020},
	title = {Mapping Datasets to Programmable Storage},
	volume = {245, 04037},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWVwamNvbmYyMC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RY2h1LWVwamNvbmYyMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9v4AnQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaHUtZXBqY29uZjIwLnBkZgAOACQAEQBjAGgAdQAtAGUAcABqAGMAbwBuAGYAMgAwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaHUtZXBqY29uZjIwLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS1zbGlkZXMucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWNodS1jaGVwMTktc2xpZGVzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////bR5eSAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y2h1LWNoZXAxOS1zbGlkZXMucGRmAA4ALAAVAGMAaAB1AC0AYwBoAGUAcAAxADkALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==},
	bdsk-url-1 = {https://indico.cern.ch/event/773049/contributions/3474413/}}

@inproceedings{lieggi:rse-hpc20,
	author = {Stephanie Lieggi and Ivo Jimenez and Jeff LeFevre and Carlos Maltzahn},
	booktitle = {RSE-HPC -- Introduction: Research Software Engineers in HPC: Creating Community, Building Careers, Addressing Challenges, co-located with SC20},
	date-added = {2020-11-30 12:29:24 -0800},
	date-modified = {2020-11-30 12:31:45 -0800},
	keywords = {papers, softwareengineering, oss, cross},
	month = {November 12},
	title = {The CROSS Incubator: A Case Study for funding and training RSEs},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJzZS1ocGMyMC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbGllZ2dpLXJzZS1ocGMyMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////97rH54AAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaWVnZ2ktcnNlLWhwYzIwLnBkZgAADgAqABQAbABpAGUAZwBnAGkALQByAHMAZQAtAGgAcABjADIAMAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJzZS1ocGMyMC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJzZS1ocGMyMC1zbGlkZXMucGRmTxEBjAAAAAABjAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////G2xpZWdnaS1yc2UtaHBjMjAtc2xpZGVzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////b9ZnGAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFMAAACAEAvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkw6bGllZ2dpLXJzZS1ocGMyMC1zbGlkZXMucGRmAA4AOAAbAGwAaQBlAGcAZwBpAC0AcgBzAGUALQBoAHAAYwAyADAALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJzZS1ocGMyMC1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAF0AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB7Q==}}

@inproceedings{chakraborty:canopie20,
	author = {Jayjeet Chakraborty and Carlos Maltzahn and Ivo Jimenez},
	booktitle = {CANOPIE HPC 2020 (at SC20)},
	date-added = {2020-11-30 07:28:21 -0800},
	date-modified = {2022-04-11 19:55:33 -0700},
	keywords = {papers, reproducibility, containers, workflowl, orchestration, nsf1836650},
	month = {November 12},
	title = {Enabling seamless execution of computational and data science workflows on HPC and cloud with the Popper container-native automation engine},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktY2Fub3BpZTIwLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xljaGFrcmFib3J0eS1jYW5vcGllMjAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2+pOygAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQwAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpDOmNoYWtyYWJvcnR5LWNhbm9waWUyMC5wZGYADgA0ABkAYwBoAGEAawByAGEAYgBvAHIAdAB5AC0AYwBhAG4AbwBwAGkAZQAyADAALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9DL2NoYWtyYWJvcnR5LWNhbm9waWUyMC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA8Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktY2Fub3BpZS0yMC1zbGlkZXMucGRmTxEBpAAAAAABpAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////H2NoYWtyYWJvcnR5LWNhbm9waSNGRkZGRkZGRi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////b6k8SAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACAEYvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y2hha3JhYm9ydHktY2Fub3BpZS0yMC1zbGlkZXMucGRmAA4ARAAhAGMAaABhAGsAcgBhAGIAbwByAHQAeQAtAGMAYQBuAG8AcABpAGUALQAyADAALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIARFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktY2Fub3BpZS0yMC1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAGMAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAACCw==}}

@article{lefevre:login20,
	author = {Jeff LeFevre and Carlos Maltzahn},
	date-added = {2020-06-12 18:36:51 -0700},
	date-modified = {2020-07-01 12:34:36 -0700},
	journal = {USENIX ;login:},
	keywords = {papers, programmable, storage, ceph, physicaldesign, cross, nsf1836650, nsf1764102, nsf1705021},
	number = {2},
	title = {SkyhookDM: Data Processing in Ceph with Programmable Storage},
	volume = {45},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS1sb2dpbjIwLnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNsZWZldnJlLWxvZ2luMjAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2wl6sgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxlZmV2cmUtbG9naW4yMC5wZGYADgAoABMAbABlAGYAZQB2AHIAZQAtAGwAbwBnAGkAbgAyADAALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xlZmV2cmUtbG9naW4yMC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{liu:hotedge20,
	address = {Boston, MA},
	author = {Jianshen Liu and Matthew Leon Curry and Carlos Maltzahn and Philip Kufeldt},
	booktitle = {HotEdge'20},
	date-added = {2020-04-19 12:38:42 -0700},
	date-modified = {2020-07-01 12:35:59 -0700},
	keywords = {papers, edge, reliability, disaggregation, embedded, failures, cross, nsf1836650, nsf1764102, nsf1705021},
	month = {July 14},
	title = {Scale-out Edge Storage Systems with Embedded Storage Nodes to Get Better Availability and Cost-Efficiency At the Same Time},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWhvdGVkZ2UyMC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbGl1LWhvdGVkZ2UyMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9sdgrIAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaXUtaG90ZWRnZTIwLnBkZgAOACQAEQBsAGkAdQAtAGgAbwB0AGUAZABnAGUAMgAwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9saXUtaG90ZWRnZTIwLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{chu:irishep20poster,
	address = {Princeton, NJ},
	author = {Aaron Chu and Ivo Jimenez and Jeff LeFevre and Carlos Maltzahn},
	booktitle = {Poster at IRIS-HEP Poster Session},
	date-added = {2020-03-09 22:19:08 -0700},
	date-modified = {2020-07-01 12:36:40 -0700},
	keywords = {poster, programmable, storage, hep, nsf1836650},
	month = {February 27},
	title = {SkyhookDM: Programmable Storage for Datasets},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWlyaXNoZXAyMHBvc3Rlci5wZGZPEQF8AAAAAAF8AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8XY2h1LWlyaXNoZXAyMHBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9qMb7UAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAPC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaHUtaXJpc2hlcDIwcG9zdGVyLnBkZgAOADAAFwBjAGgAdQAtAGkAcgBpAHMAaABlAHAAMgAwAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA6VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaHUtaXJpc2hlcDIwcG9zdGVyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABZAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdk=}}

@inproceedings{chakraborty:ecpam20,
	author = {Jayjeet Chakraborty and Ivo Jimenez and Carlos Maltzahn and Arshul Mansoori and Quincy Wofford},
	booktitle = {Poster at 2020 Exaxcale Computing Project Annual Meeting, Houston, TX, February 3-7, 2020},
	date-added = {2020-02-05 11:34:01 -0800},
	date-modified = {2022-04-11 19:54:42 -0700},
	keywords = {shortpapers, reproducibility, containers, workflow, automation, cross, nsf1836650},
	title = {Popper 2.0: A Container-native Workflow Execution Engine For Testing Complex Applications and Validating Scientific Claims},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktZWNwYW0yMC5wZGZPEQF8AAAAAAF8AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8XY2hha3JhYm9ydHktZWNwYW0yMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9pgUJQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAPC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaGFrcmFib3J0eS1lY3BhbTIwLnBkZgAOADAAFwBjAGgAYQBrAHIAYQBiAG8AcgB0AHkALQBlAGMAcABhAG0AMgAwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA6VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaGFrcmFib3J0eS1lY3BhbTIwLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABZAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdk=},
	bdsk-url-1 = {https://ecpannualmeeting.com/}}

@inproceedings{chu:chep19,
	abstract = {Access libraries such as ROOT and HDF5 allow users to interact with datasets using high level abstractions, like coordinate systems and associated slicing operations. Unfortunately, the implementations of access libraries are based on outdated assumptions about storage systems interfaces and are generally unable to fully benefit from modern fast storage devices. For example, access libraries often implement buffering and data layout that assume that large, single-threaded sequential access patterns are causing less overall latency than small parallel random access: while this is true for spinning media, it is not true for flash media. The situation is getting worse with rapidly evolving storage devices such as non-volatile memory and ever larger datasets. Our Skyhook Dataset Mapping project explores distributed dataset mapping infrastructures that can integrate and scale out existing access libraries using Ceph's extensible object model, avoiding reimplementation or even modifications of these access libraries as much as possible. These programmable storage extensions coupled with our distributed dataset mapping techniques enable: 1) access library operations to be offloaded to storage system servers, 2) the independent evolution of access libraries and storage systems and 3) fully leveraging of the existing load balancing, elasticity, and failure management of distributed storage systems like Ceph. They also create more opportunities to conduct storage server-local optimizations specific to storage servers. For example, storage servers might include local key/value stores combined with chunk stores that require different optimizations than a local file system. As storage servers evolve to support new storage devices like non-volatile memory, these server-local optimizations can be implemented while minimizing disruptions to applications. We will report progress on the means by which distributed dataset mapping can be abstracted over particular access libraries, including access libraries for ROOT data, and how we address some of the challenges revolving around data partitioning and composability of access operations.},
	address = {Adelaide, Australia},
	author = {Aaron Chu and Jeff LeFevre and Carlos Maltzahn and Aldrin Montana and Peter Alvaro and Dana Robinson and Quincey Koziol},
	booktitle = {24th International Conference on Computing in High Energy \& Nuclear Physics)},
	date-added = {2020-01-20 16:19:51 -0800},
	date-modified = {2020-07-30 14:13:11 -0700},
	keywords = {papers, programmable, declarative, objectstorage, nsf1836650},
	month = {November 4-8},
	publisher = {arXiv:2007.01789v1 (Submitted for publication)},
	title = {SkyhookDM: Mapping Scientific Datasets to Programmable Storage},
	year = {2019},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8OY2h1LWNoZXAxOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9tHl+cAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaHUtY2hlcDE5LnBkZgAADgAeAA4AYwBoAHUALQBjAGgAZQBwADEAOQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS1zbGlkZXMucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWNodS1jaGVwMTktc2xpZGVzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////bR5eSAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y2h1LWNoZXAxOS1zbGlkZXMucGRmAA4ALAAVAGMAaAB1AC0AYwBoAGUAcAAxADkALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==},
	bdsk-url-1 = {https://indico.cern.ch/event/773049/contributions/3474413/}}

@inproceedings{weil:lsf07,
	address = {San Jose, CA},
	author = {Sage Weil and Scott A. Brandt and Carlos Maltzahn},
	booktitle = {Linux Storage and Filesystem Workshop (LSF07), held in conjunction with the Conference on File and Storage Technology (FAST 07)},
	date-added = {2019-12-29 16:46:38 -0800},
	date-modified = {2019-12-29 16:46:38 -0800},
	keywords = {shortpapers, storage, scalable},
	month = {February 12--13},
	title = {Scaling Linux Storage to Petabytes},
	year = {2007},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1sc2YwNy5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Od2VpbC1sc2YwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9ouiPYAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVcAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6Vzp3ZWlsLWxzZjA3LnBkZgAADgAeAA4AdwBlAGkAbAAtAGwAcwBmADAANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1sc2YwNy5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==}}

@inproceedings{estolano:fast08wip,
	address = {San Jose, CA},
	author = {Esteban Molina-Estolano and Carlos Maltzahn and Sage Weil and Scott Brandt},
	booktitle = {Work-in-Progress Session of the USENIX Conference on File and Storage Technology (FAST 2008)},
	date-added = {2019-12-29 16:38:04 -0800},
	date-modified = {2019-12-29 16:39:22 -0800},
	keywords = {shortpapers, loadbalancing, objectstorage, distributed, storage},
	month = {February 26-29},
	title = {Dynamic Load Balancing in Ceph},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lc3RvbGFuby1mYXN0MDh3aXAucGRmTxEBgAAAAAABgAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FmVzdG9sYW5vLWZhc3QwOHdpcC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLob5AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANFLUYAAAIAPS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RS1GOmVzdG9sYW5vLWZhc3QwOHdpcC5wZGYAAA4ALgAWAGUAcwB0AG8AbABhAG4AbwAtAGYAYQBzAHQAMAA4AHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA7VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvRS1GL2VzdG9sYW5vLWZhc3QwOHdpcC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFoAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}}

@inproceedings{pye:fast08wip,
	address = {San Jose, CA},
	author = {Ian Pye and Scott Brandt and Carlos Maltzahn},
	booktitle = {Work-in-Progress Session of the USENIX Conference on File and Storage Technology (FAST 2008)},
	date-added = {2019-12-29 16:29:20 -0800},
	date-modified = {2019-12-29 16:30:47 -0800},
	keywords = {shortpapers, p2p, filesystems, global},
	month = {February 26-29},
	title = {Ringer: A Global-Scale Lightweight P2P File Service},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcHllLWZhc3QwOHdpcC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RcHllLWZhc3QwOHdpcC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9ouhUIAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVAAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UDpweWUtZmFzdDA4d2lwLnBkZgAOACQAEQBwAHkAZQAtAGYAYQBzAHQAMAA4AHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUC9weWUtZmFzdDA4d2lwLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{bigelow:fast08wip,
	address = {San Jose, CA},
	author = {David Bigelow and Scott A. Brandt and Carlos Maltzahn and Sage Weil},
	booktitle = {Work-in-Progress Session of the USENIX Conference on File and Storage Technology (FAST 2008)},
	date-added = {2019-12-29 16:25:47 -0800},
	date-modified = {2019-12-29 16:31:55 -0800},
	keywords = {shortpapers, raid, objectstorage},
	month = {February 26-29},
	title = {Adapting RAID Methods for Use in Object Storage Systems},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYmlnZWxvdy1mYXN0MDh3aXAucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWJpZ2Vsb3ctZmFzdDA4d2lwLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLoQoAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFCAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkI6YmlnZWxvdy1mYXN0MDh3aXAucGRmAA4ALAAVAGIAaQBnAGUAbABvAHcALQBmAGEAcwB0ADAAOAB3AGkAcAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0IvYmlnZWxvdy1mYXN0MDh3aXAucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}

@inproceedings{maltzahn:fast08wip,
	address = {San Jose, CA},
	author = {Carlos Maltzahn},
	booktitle = {Work-in-Progress Session of the USENIX Conference on File and Storage Technology (FAST 2008)},
	date-added = {2019-12-29 16:18:24 -0800},
	date-modified = {2020-01-04 20:29:07 -0700},
	keywords = {shortpapers, filesystems, metadata, pim},
	month = {February 26-29},
	title = {How Private are Home Directories?},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tZmFzdDA4d2lwLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZtYWx0emFobi1mYXN0MDh3aXAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2i6CqAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWZhc3QwOHdpcC5wZGYAAA4ALgAWAG0AYQBsAHQAegBhAGgAbgAtAGYAYQBzAHQAMAA4AHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWx0emFobi1mYXN0MDh3aXAucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=}}

@inproceedings{bhagwan:scc09,
	address = {Bangalore, India},
	author = {Varun Bhagwan and Carlos Maltzahn},
	booktitle = {Work-In-Progress Session at 2009 IEEE International Conference on Services Computing (SCC 2009)},
	date-added = {2019-12-29 16:11:09 -0800},
	date-modified = {2019-12-29 16:11:52 -0800},
	keywords = {shortpapers, crowdsourcing, metadata, filesystems},
	month = {September 21--25},
	title = {JabberWocky: Crowd-Sourcing Metadata for Files},
	year = {2009},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvdmJoYWd3YW4tc2NjMDkucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EnZiaGFnd2FuLXNjYzA5LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLoEFAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFCAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkI6dmJoYWd3YW4tc2NjMDkucGRmAAAOACYAEgB2AGIAaABhAGcAdwBhAG4ALQBzAGMAYwAwADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9CL3ZiaGFnd2FuLXNjYzA5LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@inproceedings{wacha:fast10poster,
	address = {San Jose, CA},
	author = {Rosie Wacha and Scott A. Brandt and Carlos Maltzahn},
	booktitle = {In Poster Session at the Conference on File and Storage Technology (FAST 2010)},
	date-added = {2019-12-27 10:40:59 -0800},
	date-modified = {2019-12-27 10:43:18 -0800},
	keywords = {shortpapers, flash, RAID},
	month = {February 24-27},
	title = {RAID4S: Adding SSDs to RAID Arrays},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2FjaGEtZmFzdDEwcG9zdGVyLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZ3YWNoYS1mYXN0MTBwb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iuQ3AAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndhY2hhLWZhc3QxMHBvc3Rlci5wZGYAAA4ALgAWAHcAYQBjAGgAYQAtAGYAYQBzAHQAMQAwAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvVy93YWNoYS1mYXN0MTBwb3N0ZXIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
	bdsk-url-1 = {http://users.soe.ucsc.edu/~carlosm/Papers/S11.pdf}}

@inproceedings{ames:fast10poster,
	address = {San Jose, CA},
	author = {Sasha Ames and Maya B. Gokhale and Carlos Maltzahn},
	booktitle = {Poster Session at the Conference on File and Storage Technology (FAST 2010)},
	date-added = {2019-12-26 20:23:07 -0800},
	date-modified = {2019-12-29 16:32:23 -0800},
	keywords = {shortpapers, filesystems, linking, metadata},
	month = {February 24-27},
	title = {Design and Implementation of a Metadata-Rich File System},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1mYXN0MTBwb3N0ZXIucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWFtZXMtZmFzdDEwcG9zdGVyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKsejAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFBAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkE6YW1lcy1mYXN0MTBwb3N0ZXIucGRmAA4ALAAVAGEAbQBlAHMALQBmAGEAcwB0ADEAMABwAG8AcwB0AGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1mYXN0MTBwb3N0ZXIucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}

@inproceedings{polte:pdsw10poster,
	address = {New Orleans, LA},
	author = {Milo Polte, Esteban Molina-Estolan, John Bent and Garth Gibson and Carlos Maltzahn and Maya B. Gokhale and Scott Brandt},
	booktitle = {Poster Session at 5th Petascale Data Storage Workshop (PDSW 2010), co-located with Supercomputing 2010},
	date-added = {2019-12-26 20:08:27 -0800},
	date-modified = {2019-12-29 16:32:38 -0800},
	keywords = {shortpapers, parallel, filesystems, cloudcomputing},
	month = {November 15},
	title = {PLFS and HDFS: Enabling Parallel Filesystem Semantics In The Cloud},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtcGRzdzEwcG9zdGVyLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZwb2x0ZS1wZHN3MTBwb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2irETwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUAAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpQOnBvbHRlLXBkc3cxMHBvc3Rlci5wZGYAAA4ALgAWAHAAbwBsAHQAZQAtAHAAZABzAHcAMQAwAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUC9wb2x0ZS1wZHN3MTBwb3N0ZXIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA4Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtcGRzdzEwcG9zdGVyLXBvc3Rlci5wZGZPEQGUAAAAAAGUAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8dcG9sdGUtcGRzdzEwcG9zdGVyLXBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqxJIAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVAAAAIAQi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UDpwb2x0ZS1wZHN3MTBwb3N0ZXItcG9zdGVyLnBkZgAOADwAHQBwAG8AbAB0AGUALQBwAGQAcwB3ADEAMABwAG8AcwB0AGUAcgAtAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBAVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUC9wb2x0ZS1wZHN3MTBwb3N0ZXItcG9zdGVyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABfAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfc=},
	bdsk-file-3 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtcGRzdzEwcG9zdGVyLXdpcC5wZGZPEQGKAAAAAAGKAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8acG9sdGUtcGRzdzEwcG9zdGVyLXdpcC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqxLsAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVAAAAIAPy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UDpwb2x0ZS1wZHN3MTBwb3N0ZXItd2lwLnBkZgAADgA2ABoAcABvAGwAdABlAC0AcABkAHMAdwAxADAAcABvAHMAdABlAHIALQB3AGkAcAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtcGRzdzEwcG9zdGVyLXdpcC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB6g==}}

@inproceedings{ames:pdsw10poster,
	address = {New Orleans, LA},
	author = {Sasha Ames and Maya B. Gokhale and Carlos Maltzahn},
	booktitle = {Session at 5th Petascale Data Storage Workshop (PDSW 2010), co-located with Supercomputing 2010},
	date-added = {2019-12-26 20:05:01 -0800},
	date-modified = {2019-12-29 16:32:49 -0800},
	keywords = {shortpapers, linking, filesystems, metadata},
	month = {November 15},
	title = {QMDS: A File System Metadata Service Supporting a Graph Data Model-Based Query Language},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1wZHN3MTBwb3N0ZXIucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWFtZXMtcGRzdzEwcG9zdGVyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKsNwAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFBAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkE6YW1lcy1wZHN3MTBwb3N0ZXIucGRmAA4ALAAVAGEAbQBlAHMALQBwAGQAcwB3ADEAMABwAG8AcwB0AGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1wZHN3MTBwb3N0ZXIucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}

@inproceedings{skourtis:fast13wip,
	address = {San Jose, CA},
	author = {Dimitris Skourtis and Scott A. Brandt and Carlos Maltzahn},
	booktitle = {Work-in-Progress and Poster Session at the Conference on File and Storage Technology (FAST 2013)},
	date-added = {2019-12-26 19:57:02 -0800},
	date-modified = {2019-12-29 16:34:24 -0800},
	keywords = {shortpapers, performance, predictable, flash, redundancy},
	month = {February 12-15},
	title = {High Performance \& Low Latency in Solid-State Drives Through Redundancy},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtZmFzdDEzd2lwLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZza291cnRpcy1mYXN0MTN3aXAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2irBvQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNrb3VydGlzLWZhc3QxM3dpcC5wZGYAAA4ALgAWAHMAawBvAHUAcgB0AGkAcwAtAGYAYQBzAHQAMQAzAHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9za291cnRpcy1mYXN0MTN3aXAucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA4Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtZmFzdDEzd2lwLXBvc3Rlci5wZGZPEQGUAAAAAAGUAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8dc2tvdXJ0aXMtZmFzdDEzd2lwLXBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqwfcAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVMAAAIAQi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6Uzpza291cnRpcy1mYXN0MTN3aXAtcG9zdGVyLnBkZgAOADwAHQBzAGsAbwB1AHIAdABpAHMALQBmAGEAcwB0ADEAMwB3AGkAcAAtAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBAVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9za291cnRpcy1mYXN0MTN3aXAtcG9zdGVyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABfAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfc=}}

@inproceedings{lofstead:cluster14poster,
	address = {Madrid, Spain},
	author = {Jay Lofstead and Ivo Jimenez and Carlos Maltzahn and Quincey Koziol and John Bent and Eric Barton},
	booktitle = {in Poster Session at IEEE Cluster 2014},
	date-added = {2019-12-26 19:23:07 -0800},
	date-modified = {2019-12-29 16:34:56 -0800},
	keywords = {shortpapers, storage, parallel, hpc, exascale},
	month = {September 22-26},
	title = {An Innovative Storage Stack Addressing Extreme Scale Platforms and Big Data Applications},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA4Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtY2x1c3RlcjE0LXBvc3Rlci5wZGZPEQGUAAAAAAGUAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8dbG9mc3RlYWQtY2x1c3RlcjE0LXBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9/k+CEAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAQi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsb2ZzdGVhZC1jbHVzdGVyMTQtcG9zdGVyLnBkZgAOADwAHQBsAG8AZgBzAHQAZQBhAGQALQBjAGwAdQBzAHQAZQByADEANAAtAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBAVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9sb2ZzdGVhZC1jbHVzdGVyMTQtcG9zdGVyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABfAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfc=}}

@inproceedings{sevilla:fast14wip,
	address = {San Jose, CA},
	author = {Michael Sevilla and Scott Brandt and Carlos Maltzahn and Ike Nassi and Sam Fineberg},
	booktitle = {Work-in-Progress and Poster Session at the 12th USENIX Conference on File and Storage Technology (FAST 2014)},
	date-added = {2019-12-26 19:20:27 -0800},
	date-modified = {2019-12-29 16:35:02 -0800},
	keywords = {shortpapers, filesystems, metadata, loadbalancing},
	month = {February 17-20},
	title = {Exploring Resource Migration using the CephFS Metadata cluster},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1mYXN0MTQtcG9zdGVyLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xlzZXZpbGxhLWZhc3QxNC1wb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+T2JgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtZmFzdDE0LXBvc3Rlci5wZGYADgA0ABkAcwBlAHYAaQBsAGwAYQAtAGYAYQBzAHQAMQA0AC0AcABvAHMAdABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtZmFzdDE0LXBvc3Rlci5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}

@inproceedings{kufeldt:fast18wip,
	address = {Oakland, CA},
	author = {Philip Kufeldt and Timothy Feldman and Christine Green and Grant Mackey and Carlos Maltzahn and Shingo Tanaka},
	booktitle = {WiP and Poster Sessions at 16th USENIX Conference on File and Storage Technologies (FAST'18)},
	date-added = {2019-12-26 19:17:05 -0800},
	date-modified = {2019-12-29 16:35:11 -0800},
	keywords = {shortpapers, eusocial, embedded, storage},
	month = {Feb 12-15},
	title = {Eusocial Storage Devices},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva3VmZWxkLWZhc3QxOHdpcC1wb3N0ZXIucGRmTxEBjAAAAAABjAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////G2t1ZmVsZC1mYXN0MTh3aXAtcG9zdGVyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////Wp7P3AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFLAAACAEAvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOks6a3VmZWxkLWZhc3QxOHdpcC1wb3N0ZXIucGRmAA4AOAAbAGsAdQBmAGUAbABkAC0AZgBhAHMAdAAxADgAdwBpAHAALQBwAG8AcwB0AGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0sva3VmZWxkLWZhc3QxOHdpcC1wb3N0ZXIucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAF0AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB7Q==}}

@inproceedings{jimenez:xldb18,
	address = {Stanford, CA},
	author = {Ivo Jimenez and Carlos Maltzahn},
	booktitle = {Lightning Talk and Poster Session at the 11th Extremely Large Databases Conference (XLDB)},
	date-added = {2019-12-26 19:14:42 -0800},
	date-modified = {2019-12-29 16:35:19 -0800},
	keywords = {shortpapers, reproducibility},
	month = {April 30},
	title = {Reproducible Computational and Data-Intensive Experimentation Pipelines with Popper},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXhsZGIxOC1zbGlkZXMucGRmTxEBigAAAAABigACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////GWppbWVuZXoteGxkYjE4LXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////f5PLgAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIAQC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXoteGxkYjE4LXNsaWRlcy5wZGYADgA0ABkAagBpAG0AZQBuAGUAegAtAHgAbABkAGIAMQA4AC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD5Vc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei14bGRiMTgtc2xpZGVzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABdAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAes=},
	bdsk-url-1 = {https://www.youtube.com/watch?v=HXk_nVq8D00&list=PLE1UFlsTj5AHNXntohlhH9nYgXGU2ZqOU&index=32}}

@inproceedings{maltzahn:hotstorage18-breakout,
	address = {Boston, MA},
	author = {Carlos Maltzahn},
	booktitle = {Breakouts Session abstract at 10th USENIX Workshop on Hot Topics in Storage and File Systems (HotStorage'18, co-located with USENIX ATC'18)},
	date-added = {2019-12-26 19:10:01 -0800},
	date-modified = {2020-01-19 16:20:17 -0800},
	keywords = {shortpapers, storage, embedded, eusocial, programmable},
	month = {July 9-10},
	title = {Should Storage Devices Stay Dumb or Become Smart?},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA9Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4taG90c3RvcmFnZTE4LWJyZWFrb3V0LnBkZk8RAaoAAAAAAaoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////x9tYWx0emFobi1ob3RzdG9yYWcjRkZGRkZGRkYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iq2rgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgBHLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWhvdHN0b3JhZ2UxOC1icmVha291dC5wZGYAAA4ARgAiAG0AYQBsAHQAegBhAGgAbgAtAGgAbwB0AHMAdABvAHIAYQBnAGUAMQA4AC0AYgByAGUAYQBrAG8AdQB0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBFVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWx0emFobi1ob3RzdG9yYWdlMTgtYnJlYWtvdXQucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABkAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAhI=},
	bdsk-url-1 = {https://docs.google.com/presentation/d/1yvXWpxfNWZ4NIL9GLLWM_e3TAm-8Mu-EfAygo1SRRlg/edit?usp=sharing},
	bdsk-url-2 = {https://docs.google.com/document/d/1Vfuoy2H8Mg2PrweO5I2sP04gAZonhUIxE3_W9oMFhwI/edit?usp=sharing}}

@inproceedings{kufeldt:fast19poster,
	address = {Boston, MA},
	author = {Philip Kufeldt and Jianshen Liu and Carlos Maltzahn},
	booktitle = {Poster Session at 17th USENIX Conference on File and Storage Technologies (FAST'19)},
	date-added = {2019-12-26 19:07:25 -0800},
	date-modified = {2019-12-29 16:35:40 -0800},
	keywords = {shortpapers, reproducibility, embedded, storage, eusocial},
	month = {Februrary 25-28},
	title = {MBWU (MibeeWu): Quantifying benefits of offloading data management to storage devices},
	year = {2019},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxBTLi4vLi4vLi4vTXkgRHJpdmUvU3VibWlzc2lvbnMvYXJjaGl2ZS8yMDE5LzIwMTkwMTE1IEZBU1QxOVdJUC9rdWZlbGR0LWZhc3QxOXdpcC5wZGZPEQHMAAAAAAHMAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Va3VmZWxkdC1mYXN0MTl3aXAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9hkMrEAAAAAAAAAAAADAAYAAAogY3UAAAAAAAAAAAAAAAAAEjIwMTkwMTE1IEZBU1QxOVdJUAACAF0vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6U3VibWlzc2lvbnM6YXJjaGl2ZToyMDE5OjIwMTkwMTE1IEZBU1QxOVdJUDprdWZlbGR0LWZhc3QxOXdpcC5wZGYAAA4ALAAVAGsAdQBmAGUAbABkAHQALQBmAGEAcwB0ADEAOQB3AGkAcAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAW1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvU3VibWlzc2lvbnMvYXJjaGl2ZS8yMDE5LzIwMTkwMTE1IEZBU1QxOVdJUC9rdWZlbGR0LWZhc3QxOXdpcC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAHoAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAACSg==},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxCbLi4vLi4vLi4vTXkgRHJpdmUvU3VibWlzc2lvbnMvYXJjaGl2ZS8yMDE5LzIwMTkwMTE1IEZBU1QxOVdJUC9RdWFudGlmeWluZyBiZW5lZml0cyBvZiBvZmZsb2FkaW5nIGRhdGEgbWFuYWdlbWVudCB0byBzdG9yYWdlIGRldmljZXMgKFBvc3RlcikgKEZBU1QgJzE5KS5wZGZPEQLsAAAAAALsAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8fUXVhbnRpZnlpbmcgYmVuZWZpI0ZGRkZGRkZGLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9kHWegAAAAAAAAAAAADAAYAAAogY3UAAAAAAAAAAAAAAAAAEjIwMTkwMTE1IEZBU1QxOVdJUAACAKUvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6U3VibWlzc2lvbnM6YXJjaGl2ZToyMDE5OjIwMTkwMTE1IEZBU1QxOVdJUDpRdWFudGlmeWluZyBiZW5lZml0cyBvZiBvZmZsb2FkaW5nIGRhdGEgbWFuYWdlbWVudCB0byBzdG9yYWdlIGRldmljZXMgKFBvc3RlcikgKEZBU1QgJzE5KS5wZGYAAA4AvABdAFEAdQBhAG4AdABpAGYAeQBpAG4AZwAgAGIAZQBuAGUAZgBpAHQAcwAgAG8AZgAgAG8AZgBmAGwAbwBhAGQAaQBuAGcAIABkAGEAdABhACAAbQBhAG4AYQBnAGUAbQBlAG4AdAAgAHQAbwAgAHMAdABvAHIAYQBnAGUAIABkAGUAdgBpAGMAZQBzACAAKABQAG8AcwB0AGUAcgApACAAKABGAEEAUwBUACAAJwAxADkAKQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAo1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvU3VibWlzc2lvbnMvYXJjaGl2ZS8yMDE5LzIwMTkwMTE1IEZBU1QxOVdJUC9RdWFudGlmeWluZyBiZW5lZml0cyBvZiBvZmZsb2FkaW5nIGRhdGEgbWFuYWdlbWVudCB0byBzdG9yYWdlIGRldmljZXMgKFBvc3RlcikgKEZBU1QgJzE5KS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAMIAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAADsg==}}

@inproceedings{lefevre:vault20,
	address = {Santa Clara, CA},
	author = {Jeff LeFevre and Carlos Maltzahn},
	booktitle = {2020 Linux Storage and Filesystems Conference (Vault'20, co-located with FAST'20 and NSDI'20)},
	date-added = {2019-12-26 19:04:52 -0800},
	date-modified = {2020-07-01 12:40:06 -0700},
	keywords = {shortpapers, programmable, storage, physicaldesign, nsf1836650, nsf1764102, nsf1705021},
	month = {February 24-25},
	title = {Scaling databases and file APIs with programmable Ceph object storage},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS12YXVsdDIwLnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNsZWZldnJlLXZhdWx0MjAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2oBm3wAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxlZmV2cmUtdmF1bHQyMC5wZGYADgAoABMAbABlAGYAZQB2AHIAZQAtAHYAYQB1AGwAdAAyADAALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xlZmV2cmUtdmF1bHQyMC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@article{ellis:jbcs94,
	author = {Clarence E. Ellis and Carlos Maltzahn},
	date-added = {2019-12-26 18:50:02 -0800},
	date-modified = {2019-12-26 18:51:29 -0800},
	journal = {Journal of the Brazilian Computer Society, Special Edition on CSCW},
	keywords = {papers, cscw},
	number = {1},
	pages = {15--23},
	title = {Collaboration with Spreadsheets},
	volume = {1},
	year = {1994},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lbGxpcy1qYmNzOTQucGRmTxEBaAAAAAABaAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EGVsbGlzLWpiY3M5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKrG7AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANFLUYAAAIANy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RS1GOmVsbGlzLWpiY3M5NC5wZGYAAA4AIgAQAGUAbABsAGkAcwAtAGoAYgBjAHMAOQA0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA1VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvRS1GL2VsbGlzLWpiY3M5NC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFQAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABwA==}}

@article{jimenez:tinytocs16,
	abstract = {Validating experimental results in the field of computer systems is a challenging task, mainly due to the many changes in software and hardware that computational environments go through. Determining if an experiment is reproducible entails two separate tasks: re-executing the experiment and validating the results. Existing reproducibility efforts have focused on the former, envisioning techniques and infrastructures that make it easier to re-execute an experiment. By focusing on the latter and analyzing the validation workflow that an experiment re-executioner goes through, we notice that validating results is done on the basis of experiment design and high-level goals, rather than exact quantitative metrics.
Based on this insight, we introduce a declarative format for describing the high-level components of an experiment, as well as a language for specifying generic, testable statements that serve as the basis for validation [1,2]. Our language allows to express and validate statements on top of metrics gathered at runtime. We demonstrate the feasibility of this approach by taking an experiment from an already published article and obtain the corresponding experiment specification. We show that, if we had this specification in the first place, validating the original findings would be an almost entirely automated task. If we contrast this with the current state of our practice, where it takes days or weeks (if successful) to reproduce results, we see how making experiment specifications available as part of a publication or as addendum to experimental results can significantly aid in the validation of computer systems research.
Acknowledgements: Work performed under auspices of US DOE by LLNL contract DE-AC52- 07NA27344 ABS-684863 and by SNL contract DE-AC04-94AL85000.},
	author = {Ivo Jimenez and Carlos Maltzahn and Jay Lofstead and Adam Moody and Kathryn Mohror and Remzi Arpaci-Dusseau and Andrea Arpaci-Dusseau},
	date-added = {2019-12-26 18:43:34 -0800},
	date-modified = {2020-01-04 21:15:26 -0700},
	journal = {Tiny Transactions on Computer Science (TinyToCS)},
	keywords = {papers, reproducibility, evaluation},
	title = {I Aver: Providing Declarative Experiment Specifications Facilitates the Evaluation of Computer Systems Research},
	volume = {4},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXRpbnl0b2NzMTYucGRmTxEBgAAAAAABgAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FmppbWVuZXotdGlueXRvY3MxNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKrBKAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIAPS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXotdGlueXRvY3MxNi5wZGYAAA4ALgAWAGoAaQBtAGUAbgBlAHoALQB0AGkAbgB5AHQAbwBjAHMAMQA2AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA7VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2ppbWVuZXotdGlueXRvY3MxNi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFoAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}}

@inproceedings{maltzahn:vkika91,
	abstract = {Die meisten CAD-Umgebungen betonen die Unterstatzung einzelner Arbeitspliitze und helfen nur sekundiir bei deren Kooperation. Wir schlagen einen umgekehrten Ansatz vor: Entwiirfe entstehen im Rahmen von interagierenden Sharing-Prozessen, die den gemeinsamen Zugang aller Beteiligten zu Konzepten, Aufgaben und Ergebnissen strukturieren. Dieser Ansatz und seine Konsequenzen werden am Beispiel des Software Engineering dargestellt. Aufder Basis einer Formalisierung dieser Prozesse steuert der ConceptTalk-Prototyp eine verteilte Softwareumgebung und speziel/e Kommunikationswerkzeuge aber das Wissensbanksystem ConceptBase. Erfahrungen mit ConceptTalk unterstatzen ein neues Paradigma, das ein Informationssystem als Medium for komplexe Kommunikation betrachtet.},
	author = {Carlos Maltzahn and Thomas Rose},
	booktitle = {Verteilte K{\"u}nstliche Intelligenz und kooperatives Arbeiten},
	date-added = {2019-12-26 18:32:03 -0800},
	date-modified = {2020-01-04 21:16:07 -0700},
	editor = {W. Brauer and D. Hern{\'a}ndez},
	keywords = {papers, cscw, softwareengineering},
	pages = {195--206},
	publisher = {Springer-Verlag Berlin Heidelberg},
	title = {ConceptTalk: Kooperationsunterst{\"u}tzung in Softwareumgebungen},
	volume = {291},
	year = {1991},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tdmtpa2E5MS5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbWFsdHphaG4tdmtpa2E5MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqrXUAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAU0AAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TTptYWx0emFobi12a2lrYTkxLnBkZgAADgAqABQAbQBhAGwAdAB6AGEAaABuAC0AdgBrAGkAawBhADkAMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tdmtpa2E5MS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@inproceedings{leung:msst07,
	abstract = {Achieving performance, reliability, and scalability presents a unique set of challenges for large distributed storage. To identify problem areas, there must be a way for developers to have a comprehensive view of the entire storage system. That is, users must be able to understand both node specific behavior and complex relationships between nodes. We present a distributed file system profiling method that supports such analysis. Our approach is based on combining node-specific metrics into a single cohesive system image. This affords users two views of the storage system: a micro, per-node view, as well as, a macro, multi- node view, allowing both node-specific and complex inter- nodal problems to be debugged. We visualize the storage system by displaying nodes and intuitively animating their metrics and behavior allowing easy analysis of complex problems.},
	address = {Santa Clara, CA},
	author = {Andrew Leung and Eric Lalonde and Jacob Telleen and James Davis and Carlos Maltzahn},
	booktitle = {Proceedings of the 24th IEEE Conference on Mass Storage Systems and Technologies (MSST 2007)},
	date-added = {2019-12-26 18:07:11 -0800},
	date-modified = {2020-01-04 21:16:58 -0700},
	keywords = {papers, performance, debuggung, distributed, storage, systems},
	month = {September},
	title = {Using Comprehensive Analysis for Performance Debugging in Distributed Storage Systems},
	year = {2007},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGV1bmctbXNzdDA3LnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBsZXVuZy1tc3N0MDcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iqn9gAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxldW5nLW1zc3QwNy5wZGYAAA4AIgAQAGwAZQB1AG4AZwAtAG0AcwBzAHQAMAA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9sZXVuZy1tc3N0MDcucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=}}

@inproceedings{lofstead:pdsw13,
	abstract = {The rise of Integrated Application Workflows (IAWs) for processing data prior to storage on persistent media prompts the need to incorporate features that reproduce many of the semantics of persistent storage devices. One such feature is the ability to manage data sets as chunks with natural barriers between different data sets. Towards that end, we need a mechanism to ensure that data moved to an intermediate storage area is both complete and correct before allowing access by other processing components. The Dou- bly Distributed Transactions (D2T) protocol offers such a mechanism. The initial development [9] suffered from scal- ability limitations and undue requirements on server processes. The current version has addressed these limitations and has demonstrated scalability with low overhead.},
	address = {Denver, CO},
	author = {Jay Lofstead and Jai Dayal and Ivo Jimenez and Carlos Maltzahn},
	booktitle = {8th Parallel Data Storage Workshop at Supercomputing '13 (PDSW 2013)},
	date-added = {2019-12-26 16:21:31 -0800},
	date-modified = {2020-01-04 21:17:41 -0700},
	keywords = {papers, transactions, datamanagement, hpc},
	month = {November 18},
	title = {Efficient Transactions for Parallel Data Movement},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtcGRzdzEzLnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNsb2ZzdGVhZC1wZHN3MTMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iql+wAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxvZnN0ZWFkLXBkc3cxMy5wZGYADgAoABMAbABvAGYAcwB0AGUAYQBkAC0AcABkAHMAdwAxADMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xvZnN0ZWFkLXBkc3cxMy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{lofstead:iasds14,
	abstract = {The DOE Extreme-Scale Technology Acceleration Fast Forward Storage and IO Stack project is going to have significant impact on storage systems design within and beyond the HPC community. With phase 1 of the project complete, it is an excellent opportunity to evaluate many of the decisions made to feed into the phase 2 effort. With this paper we not only provide a timely summary of important aspects of the design specifications but also capture the underlying reasoning that is not available elsewhere.
The initial effort to define a next generation storage system has made admirable contributions in architecture and design. Formalizing the general idea of data staging into burst buffers for the storage system will help manage the performance variability and offer additional data processing opportunities outside the main compute and storage system. Adding a transactional mech- anism to manage faults and data visibility helps enable effective analytics without having to work around the IO stack semantics. While these and other contributions are valuable, similar efforts made elsewhere may offer attractive alternatives or differing semantics that could yield a more feature rich environment with little to no additional overhead. For example, the Doubly Distributed Transactions (D2T) protocol offers an alternative approach for incorporating transactional semantics into the data path. Another project, PreDatA, examined how to get the best throughput for data operators and may offer additional insights into further refinements of the Burst Buffer concept.
This paper examines some of the choices made by the Fast Forward team and compares them with other options and offers observations and suggestions based on these other efforts. This will include some non-core contributions of other projects, such as some of the demonstration metadata and data storage components generated while implementing D2T, to make suggestions that may help the next generation design for how the IO stack works as a whole.},
	address = {Minneapolis, MN},
	author = {Jay Lofstead and Ivo Jimenez and Carlos Maltzahn},
	booktitle = {Workshop on Interfaces and Architectures for Scientific Data Storage (IASDS 2014)},
	date-added = {2019-12-26 16:17:49 -0800},
	date-modified = {2020-01-04 23:08:26 -0700},
	keywords = {papers, datamanagement, hpc},
	month = {September 9-12},
	title = {Consistency and Fault Tolerance Considerations for the Next Iteration of the DOE Fast Forward Storage and IO Project},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtaWFzZHMxNC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbG9mc3RlYWQtaWFzZHMxNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqjgAAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsb2ZzdGVhZC1pYXNkczE0LnBkZgAADgAqABQAbABvAGYAcwB0AGUAYQBkAC0AaQBhAHMAZABzADEANAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtaWFzZHMxNC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@inproceedings{lofstead:discs14,
	abstract = {Scientific simulations are moving away from using centralized persistent storage for intermediate data between workflow steps towards an all online model. This shift is motivated by the relatively slow IO bandwidth growth compared with compute speed increases. The challenges presented by this shift to Integrated Application Workflows are motivated by the loss of persistent storage semantics for node-to-node communication. One step towards addressing this semantics gap is using transac- tions to logically delineate a data set from 100,000s of processes to 1000s of servers as an atomic unit.
Our previously demonstrated Doubly Distributed Transac- tions (D2T) protocol showed a high-performance solution, but had not explored how to detect and recover from faults. Instead, the focus was on demonstrating high-performance typical case performance. The research presented here addresses fault detec- tion and recovery based on the enhanced protocol design. The total overhead for a full transaction with multiple operations at 65,536 processes is on average 0.055 seconds. Fault detection and recovery mechanisms demonstrate similar performance to the success case with only the addition of appropriate timeouts for the system. This paper explores the challenges in designing a recoverable protocol for doubly distributed transactions, partic- ularly for parallel computing environments.},
	address = {New Orleans, LA},
	author = {Jay Lofstead and Jai Dayal and Ivo Jimenez and Carlos Maltzahn},
	booktitle = {The 2014 International Workshop on Data-Intensive Scalable Computing Systems (DISCS-2014) (Workshop co-located with Supercomputing 2014)},
	date-added = {2019-12-26 16:14:45 -0800},
	date-modified = {2020-01-04 21:18:57 -0700},
	keywords = {papers, datamanagement, hpc},
	month = {November 16},
	title = {Efficient, Failure Resilient Transactions for Parallel and Distributed Computing},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtZGlzY3MxNC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbG9mc3RlYWQtZGlzY3MxNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqjVsAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsb2ZzdGVhZC1kaXNjczE0LnBkZgAADgAqABQAbABvAGYAcwB0AGUAYQBkAC0AZABpAHMAYwBzADEANAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtZGlzY3MxNC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@inproceedings{jimenez:woc15,
	abstract = {Evaluating experimental results in the field of com- puter systems is a challenging task, mainly due to the many changes in software and hardware that computational environ- ments go through. In this position paper, we analyze salient features of container technology that, if leveraged correctly, can help reduce the complexity of reproducing experiments in systems research. We present a use case in the area of distributed storage systems to illustrate the extensions that we envision, mainly in terms of container management infrastructure. We also discuss the benefits and limitations of using containers as a way of reproducing research in other areas of experimental systems research.},
	address = {Tempe, AZ},
	author = {Ivo Jimenez and Carlos Maltzahn and Adam Moody and Kathryn Mohror and Jay Lofstead and Remzi Arpaci-Dusseau and Andrea Arpaci-Dusseau},
	booktitle = {First Workshop on Containers (WoC 2015) (Workshop co-located with IEEE International Conference on Cloud Engineering - IC2E 2015)},
	date-added = {2019-12-26 16:08:16 -0800},
	date-modified = {2020-01-19 16:41:52 -0800},
	keywords = {papers, reproducibility, containers},
	month = {March 9-13},
	title = {The Role of Container Technology in Reproducible Computer Systems Research},
	year = {2015},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXdvYzE1LnBkZk8RAWoAAAAAAWoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xFqaW1lbmV6LXdvYzE1LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iqMtQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADgvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaW1lbmV6LXdvYzE1LnBkZgAOACQAEQBqAGkAbQBlAG4AZQB6AC0AdwBvAGMAMQA1AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA2VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2ppbWVuZXotd29jMTUucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFUAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABww==}}

@inproceedings{lofstead:sc16,
	abstract = {The DOE Extreme-Scale Technology Acceleration Fast Forward Storage and IO Stack project is going to have significant impact on storage systems design within and beyond the HPC community. With phase two of the project starting, it is an excellent opportunity to explore the complete design and how it will address the needs of extreme scale platforms. This paper examines each layer of the proposed stack in some detail along with cross-cutting topics, such as transactions and metadata management.
This paper not only provides a timely summary of important aspects of the design specifications but also captures the under- lying reasoning that is not available elsewhere. We encourage the broader community to understand the design, intent, and future directions to foster discussion guiding phase two and the ultimate production storage stack based on this work. An initial performance evaluation of the early prototype implementation is also provided to validate the presented design.
},
	address = {Salt Lake City, UT},
	author = {Jay Lofstead and Ivo Jimenez and Carlos Maltzahn and Quincey Koziol and John Bent and Eric Barton},
	booktitle = {29th ACM and IEEE International Conference for High Performance Computing, Networking, Storage and Analysis (SC16)},
	date-added = {2019-12-26 15:58:41 -0800},
	date-modified = {2020-01-04 21:19:51 -0700},
	keywords = {papers, parallel, storage, hpc, exascale},
	month = {November 13-18},
	title = {DAOS and Friends: A Proposal for an Exascale Storage System},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtc2MxNi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbG9mc3RlYWQtc2MxNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9mAdiIAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsb2ZzdGVhZC1zYzE2LnBkZgAOACQAEQBsAG8AZgBzAHQAZQBhAGQALQBzAGMAMQA2AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9sb2ZzdGVhZC1zYzE2LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{jimenez:icpe18,
	abstract = {We introduce quiho, a framework for profiling application performance that can be used in automated performance regression tests. quiho profiles an application by applying sensitivity analysis, in particular statistical regression analysis (SRA), using application-independent performance feature vectors that characterize the performance of machines. The result of the SRA, feature importance specifically, is used as a proxy to identify hardware and low-level system software behavior. The relative importance of these features serve as a performance profile of an application (termed inferred resource utilization profile or IRUP), which is used to automatically validate performance behavior across multiple revisions of an application's code base without having to instrument code or obtain performance counters. We demonstrate that quiho can successfully discover performance regressions by showing its effectiveness in profiling application performance for synthetically introduced regressions as well as those found in real-world applications.},
	address = {Berlin, Germany},
	author = {Ivo Jimenez and Noah Watkins and Michael Sevilla and Jay Lofstead and Carlos Maltzahn},
	booktitle = {9th ACM/SPEC International Conference on Performance Engineering (ICPE 2018)},
	date-added = {2019-12-26 15:51:19 -0800},
	date-modified = {2020-07-01 12:46:23 -0700},
	keywords = {papers, reproducibility, performance, testing, cross, sandia, nsf1450488},
	month = {April 9-13},
	title = {quiho: Automated Performance Regression Testing Using Inferred Resource Utilization Profiles},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LWljcGUxOC5wZGZPEQFwAAAAAAFwAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8SamltZW5lei1pY3BlMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9wly2wAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA5LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6amltZW5lei1pY3BlMTgucGRmAAAOACYAEgBqAGkAbQBlAG4AZQB6AC0AaQBjAHAAZQAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADdVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1pY3BlMTgucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABWAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAco=}}

@inproceedings{jimenez:rescue-hpc18,
	abstract = {Advances in agile software delivery methodologies and tools (commonly referred to as DevOps) have not yet materialized in academic scenarios such as university, industry and government laboratories. In this position paper we make the case for Black Swan, a platform for the agile implementation, maintenance and curation of experimentation pipelines by embracing a DevOps approach.},
	address = {Dallas, TX},
	author = {Ivo Jimenez and Carlos Maltzahn},
	booktitle = {1st Workshop on Reproducible, Customizable and Portable Workflows for HPC (ResCuE-HPC'18, co-located with SC'18)},
	date-added = {2019-12-26 15:45:05 -0800},
	date-modified = {2020-07-01 12:44:44 -0700},
	keywords = {papers, reproducibility, cross},
	month = {November 11},
	title = {Spotting Black Swans With Ease: The Case for a Practical Reproducibility Platform},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXJlc2N1ZS1ocGMxOC5wZGZPEQGIAAAAAAGIAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8YamltZW5lei1yZXNjdWUtaHBjMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqhuQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA/LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6amltZW5lei1yZXNjdWUtaHBjMTgucGRmAAAOADIAGABqAGkAbQBlAG4AZQB6AC0AcgBlAHMAYwB1AGUALQBoAHAAYwAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD1Vc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1yZXNjdWUtaHBjMTgucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABcAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAeg=},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA8Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXJlc2N1ZS1ocGMxOC1zbGlkZXMucGRmTxEBogAAAAABogACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////H2ppbWVuZXotcmVzY3VlLWhwYzE4LXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKoc5AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIARi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXotcmVzY3VlLWhwYzE4LXNsaWRlcy5wZGYADgBAAB8AagBpAG0AZQBuAGUAegAtAHIAZQBzAGMAdQBlAC0AaABwAGMAMQA4AC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAERVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1yZXNjdWUtaHBjMTgtc2xpZGVzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABjAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAgk=}}

@inproceedings{liu:iodc19,
	abstract = {The storage industry is considering new kinds of storage de- vices that support data access function offloading, i.e. the ability to perform data access functions on the storage device itself as opposed to performing it on a separate compute system to which the storage device is connected. But what is the benefit of offloading to a storage device that is controlled by an embedded platform, very different from a host platform? To quantify the benefit, we need a measurement methodology that enables apple-to-apple comparisons between different platforms. We propose a Media-based Work Unit (MBWU, pronounced ''MibeeWu''), and an MBWU-based measurement methodology to standardize the platform efficiency evaluation so as to quantify the benefit of offloading. To demonstrate the merit of this methodology, we implemented a prototype to automate quantifying the benefit of offloading the key-value data access function.},
	address = {Frankfurt a. M., Germany},
	author = {Jianshen Liu and Philip Kufeldt and Carlos Maltzahn},
	booktitle = {HPC I/O in the Data Center Workshop (HPC-IODC 2019, co-located with ISC-HPC 2019)},
	date-added = {2019-12-26 15:40:05 -0800},
	date-modified = {2020-07-01 13:11:21 -0700},
	keywords = {papers, reproducibility, performance, embedded, storage, eusocial, cross},
	month = {June 20},
	title = {MBWU: Benefit Quantification for Data Access Function Offloading},
	year = {2019},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWlvZGMxOS5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8ObGl1LWlvZGMxOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9lvmDAAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaXUtaW9kYzE5LnBkZgAADgAeAA4AbABpAHUALQBpAG8AZABjADEAOQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWlvZGMxOS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWlvZGMxOS1zbGlkZXMucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWxpdS1pb2RjMTktc2xpZGVzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKoXmAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFMAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkw6bGl1LWlvZGMxOS1zbGlkZXMucGRmAA4ALAAVAGwAaQB1AC0AaQBvAGQAYwAxADkALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWlvZGMxOS1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}

@inproceedings{dahlgren:pdsw19,
	abstract = {In the post-Moore era, systems and devices with new architectures will arrive at a rapid rate with significant impacts on the software stack. Applications will not be able to fully benefit from new architectures unless they can delegate adapting to new devices in lower layers of the stack. In this paper we introduce physical design management which deals with the problem of identifying and executing transformations on physical designs of stored data, i.e. how data is mapped to storage abstractions like files, objects, or blocks, in order to improve performance. Physical design is traditionally placed with applications, access libraries, and databases, using hard- wired assumptions about underlying storage systems. Yet, storage systems increasingly not only contain multiple kinds of storage devices with vastly different performance profiles but also move data among those storage devices, thereby changing the benefit of a particular physical design. We advocate placing physical design management in storage, identify interesting research challenges, provide a brief description of a prototype implementation in Ceph, and discuss the results of initial experiments at scale that are replicable using Cloudlab. These experiments show performance and resource utilization trade-offs associated with choosing different physical designs and choosing to transform between physical designs.},
	address = {Denver, CO},
	author = {Kathryn Dahlgren and Jeff LeFevre and Ashay Shirwadkar and Ken Iizawa and Aldrin Montana and Peter Alvaro and Carlos Maltzahn},
	booktitle = {4th International Parallel Data Systems Workshop (PDSW 2019, co-located with SC'19)},
	date-added = {2019-12-26 15:35:44 -0800},
	date-modified = {2020-07-01 12:44:17 -0700},
	keywords = {papers, programmable, storage, datamanagement, physicaldesign, cross, nsf1836650, nsf1764102, nsf1705021},
	month = {November 18},
	title = {Towards Physical Design Management in Storage Systems},
	year = {2019},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0QvZGFobGdyZW4tcGRzdzE5LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNkYWhsZ3Jlbi1wZHN3MTkucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iqEdAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABRAAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpEOmRhaGxncmVuLXBkc3cxOS5wZGYADgAoABMAZABhAGgAbABnAHIAZQBuAC0AcABkAHMAdwAxADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9EL2RhaGxncmVuLXBkc3cxOS5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{uta:nsdi20,
	abstract = {Performance variability has been acknowledged as a problem for over a decade by cloud practitioners and performance engineers. Yet, our survey of top systems conferences reveals that the research community regularly disregards variability when running  experiments  in  the  cloud.  Focusing  on  networks, we assess the impact of variability on cloud-based big-data workloads by gathering traces from mainstream commercial clouds and private research clouds. Our data collection consists of millions of datapoints gathered while transferring over 9 petabytes of data. We characterize the network variability present in our data and show that, even though commercial cloud providers implement mechanisms for quality-of-service enforcement, variability still occurs, and is even exacerbated by such mechanisms and service provider policies. We show how big-data workloads suffer from significant slowdowns and  lack  predictability  and  replicability, even  when  state-of-the-art experimentation techniques are used. We provide guidelines for practitioners to reduce the volatility of big data performance, making experiments more repeatable.},
	address = {Santa Clara, CA},
	author = {Alexandru Uta and Alexandru Custura and Dmitry Duplyakin and Ivo Jimenez and Jan Rellermeyer and Carlos Maltzahn and Robert Ricci and Alexandru Iosup},
	booktitle = {NSDI '20},
	date-added = {2019-12-26 15:33:24 -0800},
	date-modified = {2020-07-01 12:48:02 -0700},
	keywords = {papers, reproducibility, datacenter, performance, cross, nsf1450488, nsf1705021, nsf1764102, nsf1836650},
	month = {February 25-27},
	title = {Is Big Data Performance Reproducible in Modern Cloud Networks?},
	year = {2020},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1UtVi91dGEtbnNkaTIwLnBkZk8RAWAAAAAAAWAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w51dGEtbnNkaTIwLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2mgzfwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADVS1WAAACADUvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlUtVjp1dGEtbnNkaTIwLnBkZgAADgAeAA4AdQB0AGEALQBuAHMAZABpADIAMAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAM1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1UtVi91dGEtbnNkaTIwLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAUgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAG2}}

@inproceedings{lefevre:vault19,
	abstract = {Ceph is an open source distributed storage system that is object-based and massively scalable. Ceph provides developers with the capability to create data interfaces that can take advantage of local CPU and memory on the storage nodes (Ceph Object Storage Devices). These interfaces are powerful for application developers and can be created in C, C++, and Lua.

Skyhook is an open source storage and database project in the Center for Research in Open Source Software at UC Santa Cruz. Skyhook uses these capabilities in Ceph to create specialized read/write interfaces that leverage IO and CPU within the storage layer toward database processing and management. Specifically, we develop methods to apply predicates locally as well as additional metadata and indexing capabilities using Ceph's internal indexing mechanism built on top of RocksDB.

Skyhook's approach helps to enable scale-out of a single node database system by scaling out the storage layer. Our results show the performance benefits for some queries indeed scale well as the storage layer scales out.},
	address = {Boston, MA},
	author = {Jeff LeFevre and Noah Watkins and Michael Sevilla and Carlos Maltzahn},
	booktitle = {2019 Linux Storage and Filesystems (Vault'19, co-located with FAST'19)},
	date-added = {2019-08-07 17:58:01 -0700},
	date-modified = {2020-07-01 12:49:10 -0700},
	keywords = {papers, programmable, storage, database, cross, nsf1705021, nsf1764102, nsf1836650},
	month = {Februrary 25-26},
	title = {Skyhook: Programmable storage for databases},
	year = {2019},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS12YXVsdDE5LXNsaWRlcy5wZGZPEQGKAAAAAAGKAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8abGVmZXZyZS12YXVsdDE5LXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9nVvz8AAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAPy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsZWZldnJlLXZhdWx0MTktc2xpZGVzLnBkZgAADgA2ABoAbABlAGYAZQB2AHIAZQAtAHYAYQB1AGwAdAAxADkALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS12YXVsdDE5LXNsaWRlcy5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB6g==}}

@inproceedings{david:precs19,
	abstract = {Computer network research experiments can be broadly grouped in three categories: simulated, controlled, and real-world experiments. Simulation frameworks, experiment testbeds and measurement tools, respectively, are commonly used as the platforms for carrying out network experiments. In many cases, given the nature of computer network experiments, properly configuring these platforms is a complex and time-consuming task, which makes replicating and validating research results quite challenging. This complexity can be reduced by leveraging tools that enable experiment reproducibility. In this paper, we show how a recently proposed reproducibility tool called Popper facilitates the reproduction of networking exper- iments. In particular, we detail the steps taken to reproduce results in two published articles that rely on simulations. The outcome of this exercise is a generic workflow for carrying out network simulation experiments. In addition, we briefly present two additional Popper workflows for running experiments on controlled testbeds, as well as studies that gather real-world metrics (all code is publicly available on Github). We close by providing a list of lessons we learned throughout this process.},
	author = {Andrea David and Mariette Souppe and Ivo Jimenez and Katia Obraczka and Sam Mansfield and Kerry Veenstra and Carlos Maltzahn},
	booktitle = {P-RECS'19},
	date-added = {2019-06-25 11:22:58 -0700},
	date-modified = {2020-07-01 12:50:12 -0700},
	keywords = {papers, reproducibility, networking, experience, cross, nsf1450488, nsf1836650},
	month = {June 24},
	title = {Reproducible Computer Network Experiments: A Case Study Using Popper},
	year = {2019},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0QvZGF2aWQtcHJlY3MxOS5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RZGF2aWQtcHJlY3MxOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9lvlSUAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUQAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RDpkYXZpZC1wcmVjczE5LnBkZgAOACQAEQBkAGEAdgBpAGQALQBwAHIAZQBjAHMAMQA5AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvRC9kYXZpZC1wcmVjczE5LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@unpublished{liu:ocpgs19,
	author = {Jianshen Liu and Philip Kufeldt and Carlos Maltzahn},
	date-added = {2019-05-06 18:39:54 -0700},
	date-modified = {2020-07-01 12:51:05 -0700},
	keywords = {shortpapers, eusocial, storagemedium, performance, cross},
	month = {March 14-15},
	note = {Poster at OCP Global Summit 2019},
	title = {Quantifying benefits of offloading data management to storage devices},
	year = {2019},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LW9jcGdzMTktcG9zdGVyLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZsaXUtb2NwZ3MxOS1wb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2PYw6AAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxpdS1vY3BnczE5LXBvc3Rlci5wZGYAAA4ALgAWAGwAaQB1AC0AbwBjAHAAZwBzADEAOQAtAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9saXUtb2NwZ3MxOS1wb3N0ZXIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=}}

@inproceedings{sevilla:hotstorage18,
	abstract = {The file system metadata service is the scalability bottleneck for many of today's workloads. Common approaches for attacking this ``metadata scaling wall'' include: caching inodes on clients and servers, caching parent inodes for path traversal, and dynamic caching policies that exploit workload locality. These caches reduce the number of remote procedure calls (RPCs) but the effectiveness is dependent on the overhead of maintaining cache coherence and the administrator's ability to select the best cache size for the given workloads. Recent work reduces the number of metadata RPCs to 1 without using a cache at all, by letting clients ``decouple'' the subtrees from the global namespace so that they can do metadata operations locally. Even with this technique, we show that file system metadata is still a bottleneck because namespaces for today's workloads can be very large. The size is problematic for reads because metadata needs to be transferred and materialized. 

The management techniques for file system metadata assume that namespaces have no structure but we observe that this is not the case for all workloads. We propose Tintenfisch, a file system that allows users to succinctly express the structure of the metadata they intend to create. If a user can express the structure of the namespace, Tintenfisch clients and servers can (1) compact metadata, (2) modify large namespaces more quickly, and (3) generate only relevant parts of the namespace. This reduces network traffic, storage footprints, and the number of overall metadata operations needed to complete a job.},
	address = {Boston, MA},
	annote = {Submitted to HotStorage'18},
	author = {Michael A. Sevilla and Reza Nasirigerdeh and Carlos Maltzahn and Jeff LeFevre and Noah Watkins and Peter Alvaro and Margaret Lawson and Jay Lofstead and Jim Pivarski},
	booktitle = {HotStorage '18},
	date-added = {2018-09-04 00:39:56 -0700},
	date-modified = {2020-07-01 12:53:25 -0700},
	keywords = {papers, metadata, filesystems, scalable, naming, cross, doeDE-SC0016074, nsf1450488, nsf1705021},
	month = {July 9-10},
	title = {Tintenfisch: File System Namespace Schemas and Generators},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1ob3RzdG9yYWdlMTgucGRmTxEBggAAAAABggACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////GHNldmlsbGEtaG90c3RvcmFnZTE4LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////Xs4gIAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACAD0vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2V2aWxsYS1ob3RzdG9yYWdlMTgucGRmAAAOADIAGABzAGUAdgBpAGwAbABhAC0AaABvAHQAcwB0AG8AcgBhAGcAZQAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADtVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtaG90c3RvcmFnZTE4LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHg}}

@inproceedings{maricq:osdi18,
	abstract = {The performance of compute hardware varies: software run repeatedly on the same server (or a different server with supposedly identical parts) can produce performance results that differ with each execution. This variation has important effects on the reproducibility of systems research and ability to quantitatively compare the performance of different systems. It also has implications for commercial computing, where agreements are often made conditioned on meeting specific performance targets.
Over a period of 10 months, we conducted a large-scale study capturing nearly 900,000 data points from 835 servers. We examine this data from two perspectives: that of a service provider wishing to offer a consistent environment, and that of a systems researcher who must understand how variability impacts experimental results. From this examination, we draw a number of lessons about the types and magnitudes of performance variability and the effects on confidence in experiment results. We also create a statistical model that can be used to understand how representative an individual server is of the general population. The full dataset and our analysis tools are publicly available, and we have built a system to interactively explore the data and make recommendations for experiment parameters based on statistical analysis of historical data.},
	address = {Carlsbad, CA},
	author = {Aleksander Maricq and Dmitry Duplyakin and Ivo Jimenez and Carlos Maltzahn and Ryan Stutsman and Robert Ricci},
	booktitle = {13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18)},
	date-added = {2018-07-21 02:10:24 +0000},
	date-modified = {2020-07-01 12:54:52 -0700},
	keywords = {papers, performance, statistics, cloud, reproducibility, systems, nsf1450488, cross},
	month = {October 8-10},
	title = {Taming Performance Variability},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFyaWNxLW9zZGkxOC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbWFyaWNxLW9zZGkxOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9fT1NAAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAU0AAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TTptYXJpY3Etb3NkaTE4LnBkZgAOACQAEQBtAGEAcgBpAGMAcQAtAG8AcwBkAGkAMQA4AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYXJpY3Etb3NkaTE4LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{sevilla:ccgrid18,
	abstract = {Our analysis of the key-value activity generated by the ParSplice molecular dynamics simulation demonstrates the need for more complex cache management strategies. Baseline measurements show clear key access patterns and hot spots that offer significant opportunity for optimization. We use the data management language and policy engine from the Mantle system to dynamically explore a variety of techniques, ranging from basic algorithms and heuristics to statistical models, calculus, and machine learning. While Mantle was originally designed for distributed file systems, we show how the collection of abstractions effectively decomposes the problem into manageable policies for a different application and storage system. Our exploration of this space results in a dynamically sized cache policy that does not sacrifice any performance while using 32-66% less memory than the default ParSplice configuration.},
	address = {Washington, DC},
	author = {Michael A. Sevilla and Carlos Maltzahn and Peter Alvaro and Reza Nasirigerdeh and Bradley W. Settlemyer and Danny Perez and David Rich and Galen M. Shipman},
	booktitle = {CCGRID '18},
	date-added = {2018-07-01 21:56:37 +0000},
	date-modified = {2020-07-01 12:57:24 -0700},
	keywords = {papers, caching, programmable, storage, hpc, doeDE-SC0016074, cross},
	month = {May 1-4},
	title = {Programmable Caches with a Data Management Language \& Policy Engine},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1jY2dyaWQxOC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Uc2V2aWxsYS1jY2dyaWQxOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9ezkIQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVMAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UzpzZXZpbGxhLWNjZ3JpZDE4LnBkZgAADgAqABQAcwBlAHYAaQBsAGwAYQAtAGMAYwBnAHIAaQBkADEAOAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1jY2dyaWQxOC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@inproceedings{sevilla:precs18,
	abstract = {We describe the four publications we have tried to make reproducible and discuss how each paper has changed our workflows, practices, and collaboration policies. The fundamental insight is that paper artifacts must be made reproducible from the start of the project; artifacts are too difficult to make reproducible when the papers are (1) already published and (2) authored by researchers that are not thinking about reproducibility. In this paper, we present the best practices adopted by our research laboratory, which was sculpted by the pitfalls we have identified for the Popper convention. We conclude with a ``call-to-arms" for the community focused on enhancing reproducibility initiatives for academic conferences, industry environments, and national laboratories. We hope that our experiences will shape a best practices guide for future reproducible papers.},
	address = {Tempe, AZ},
	author = {Michael A. Sevilla and Carlos Maltzahn},
	booktitle = {P-RECS'18},
	date-added = {2018-06-12 17:20:57 +0000},
	date-modified = {2020-07-01 12:57:49 -0700},
	keywords = {papers, reproducibility, experience, cross, nsf1450488},
	month = {June 11},
	title = {Popper Pitfalls: Experiences Following a Reproducibility Convention},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1wcmVjczE4LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNzZXZpbGxhLXByZWNzMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////10VPrQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtcHJlY3MxOC5wZGYADgAoABMAcwBlAHYAaQBsAGwAYQAtAHAAcgBlAGMAcwAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtcHJlY3MxOC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@article{kufeldt:login18,
	abstract = {As storage devices get faster, data management tasks rob the host of CPU cycles and DDR bandwidth. In this article, we examine a new interface to storage devices that can leverage existing and new CPU and DRAM resources to take over data management tasks like availability, recovery, and migrations. This new interface provides a roadmap for device-to-device interactions and more powerful storage devices capable of providing in-store compute services that can dramatically improve performance. We call such storage devices ``eusocial'' because we are inspired by eusocial insects like ants, termites, and bees, which as individuals are primitive but collectively accomplish amazing things.
},
	author = {Philip Kufeldt and Carlos Maltzahn and Tim Feldman and Christine Green and Grant Mackey and Shingo Tanaka},
	date-added = {2018-06-06 16:06:14 +0000},
	date-modified = {2020-07-01 12:58:56 -0700},
	journal = {;login: The USENIX Magazine},
	keywords = {papers, storage, devices, networking, flash, offloading, cross},
	number = {2},
	pages = {16--22},
	title = {Eusocial Storage Devices - Offloading Data Management to Storage Devices that Can Act Collectively},
	volume = {43},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva3VmZWxkdC1sb2dpbjE4LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNrdWZlbGR0LWxvZ2luMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////13fyGAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpLOmt1ZmVsZHQtbG9naW4xOC5wZGYADgAoABMAawB1AGYAZQBsAGQAdAAtAGwAbwBnAGkAbgAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9LL2t1ZmVsZHQtbG9naW4xOC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{jimenez:pdsw15,
	abstract = {Validating experimental results in the field of storage systems is a challenging task, mainly due to the many changes in software and hardware that computational environments go through. Determining if an experiment is reproducible entails two separate tasks: re-executing the experiment and validating the results. Existing reproducibility efforts have focused on the former, envisioning techniques and infrastructures that make it easier to re-execute an experiment. In this position paper, we focus on the latter by analyzing the validation workflow that an experiment re-executioner goes through. We notice that validating results is done on the basis of experiment design and high-level goals, rather than exact quantitative metrics. Based on this insight, we introduce a declarative format for specifying the high-level components of an experiment as well as describing generic, testable conditions that serve as the basis for validation. We present a use case in the area of distributed storage systems to illustrate the usefulness of this approach.},
	address = {Austin, TX},
	author = {Ivo Jimenez and Carlos Maltzahn and Jay Lofstead and Kathryn Mohror and Adam Moody and Remzi Arpaci-Dusseau and Andrea Arpaci-Dusseau},
	booktitle = {PDSW'15},
	date-added = {2018-05-15 06:28:35 +0000},
	date-modified = {2020-01-04 23:42:08 -0700},
	keywords = {papers, reproducibility, declarative},
	month = {November 15},
	title = {Tackling the Reproducibility Problem in Storage Systems Research with Declarative Experiment Specifications},
	year = {2015},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXBkc3cxNS5wZGZPEQFwAAAAAAFwAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8SamltZW5lei1wZHN3MTUucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9cfy+sAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA5LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6amltZW5lei1wZHN3MTUucGRmAAAOACYAEgBqAGkAbQBlAG4AZQB6AC0AcABkAHMAdwAxADUALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADdVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1wZHN3MTUucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABWAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAco=}}

@techreport{sevilla:ucsctr18,
	address = {Santa Cruz, CA},
	annote = {Submitted to HotStorage'18},
	author = {Michael A. Sevilla and Reza Nasirigerdeh and Carlos Maltzahn and Jeff LeFevre and Noah Watkins and Peter Alvaro and Margaret Lawson and Jay Lofstead and Jim Pivarski},
	date-added = {2018-04-08 04:09:23 +0000},
	date-modified = {2018-04-08 04:13:07 +0000},
	institution = {UC Santa Cruz},
	keywords = {papers, metadata, filesystems, scalable, naming},
	month = {April 7},
	number = {UCSC-SOE-18-08},
	title = {Tintenfisch: File System Namespace Schemas and Generators},
	type = {Tech. rept.},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS11Y3NjdHIxOC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Uc2V2aWxsYS11Y3NjdHIxOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9bu4/kAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVMAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UzpzZXZpbGxhLXVjc2N0cjE4LnBkZgAADgAqABQAcwBlAHYAaQBsAGwAYQAtAHUAYwBzAGMAdAByADEAOAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS11Y3NjdHIxOC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@inproceedings{jia:hipc17,
	abstract = {Accessing external resources (e.g., loading input data, checkpointing snapshots, and out-of-core processing) can have a significant impact on the performance of applications. However, no existing programming systems for high-performance computing directly manage and optimize external accesses. As a result, users must explicitly manage external accesses alongside their computation at the application level, which can result in both correctness and performance issues.
We address this limitation by introducing Iris, a task-based programming model with semantics for external resources. Iris allows applications to describe their access requirements to external resources and the relationship of those accesses to the computation. Iris incorporates external I/O into a deferred execution model, reschedules external I/O to overlap I/O with computation, and reduces external I/O when possible. We evaluate Iris on three microbenchmarks representative of important workloads in HPC and a full combustion simulation, S3D. We demonstrate that the Iris implementation of S3D reduces the external I/O overhead by up to 20x, compared to the Legion and the Fortran implementations.},
	address = {Jaipur, India},
	author = {Zhihao Jia and Sean Treichler and Galen Shipman and Michael Bauer and Noah Watkins and Carlos Maltzahn and Pat McCormick and Alex Aiken},
	booktitle = {HiPC 2017},
	date-added = {2018-04-03 18:26:23 +0000},
	date-modified = {2020-07-01 12:59:49 -0700},
	keywords = {papers, runtime, distributed, programming, storage, cross, doeDE-SC0016074, nsf1450488},
	month = {December 18-21},
	title = {Integrating External Resources with a Task-Based Programming Model},
	year = {2017},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaWEtaGlwYzE3LnBkZk8RAWAAAAAAAWAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w5qaWEtaGlwYzE3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////17ONigAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADUvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaWEtaGlwYzE3LnBkZgAADgAeAA4AagBpAGEALQBoAGkAcABjADEANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAM1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0ktSi9qaWEtaGlwYzE3LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAUgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAG2}}

@inproceedings{sevilla:ipdps18,
	abstract = {HPC and data center scale application developers are abandoning POSIX IO because file system metadata synchronization and serialization overheads of providing strong consistency and durability are too costly -- and often unnecessary -- for their applications. Unfortunately, designing file systems with weaker consistency or durability semantics excludes applications that rely on stronger guarantees, forcing developers to re-write their applications or deploy them on a different system. We present a framework and API that lets administrators specify their consistency/durability requirements and dynamically assign them to subtrees in the same namespace, allowing administrators to optimize subtrees over time and space for different workloads. We show similar speedups to related work but more importantly, we show performance improvements when we custom fit subtree semantics to applications such as checkpoint-restart (91.7x speedup), user home directories (0.03 standard deviation from optimal), and users checking for partial results (2\% overhead).},
	address = {Vancouver, BC, Canada},
	author = {Michael A. Sevilla and Ivo Jimenez and Noah Watkins and Jeff LeFevre and Peter Alvaro and Shel Finkelstein and Patrick Donnelly and Carlos Maltzahn},
	booktitle = {IPDPS 2018},
	date-added = {2018-03-19 21:24:16 +0000},
	date-modified = {2020-07-01 13:03:23 -0700},
	keywords = {papers, metadata, datamanagement, programmable, filesystems, storage, systems, cross, nsf1450488, doeDE-SC0016074},
	month = {May 21-25},
	title = {Cudele: An API and Framework for Programmable Consistency and Durability in a Global Namespace},
	year = {2018},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1pcGRwczE4LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNzZXZpbGxhLWlwZHBzMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////17OPNgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtaXBkcHMxOC5wZGYADgAoABMAcwBlAHYAaQBsAGwAYQAtAGkAcABkAHAAcwAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtaXBkcHMxOC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{ionkov-pdsw17,
	abstract = {Scientific workflows contain an increasing number of interacting applications, often with big disparity between the formats of data being produced and consumed by different applications. This mismatch can result in performance degradation as data retrieval causes multiple read operations (often to a remote storage system) in order to convert the data. Although some parallel filesystems and middleware libraries attempt to identify access patterns and optimize data retrieval, they frequently fail if the patterns are complex.
The goal of ASGARD is to replace I/O operations issued to a file by the processes with a single operation that passes enough semantic information to the storage system, so it can combine (and eventually optimize) the data movement. ASGARD allows application developers to define their application's abstract dataset as well as the subsets of the data (fragments) that are created and used by the HPC codes. It uses the semantic information to generate and execute transformation rules that convert the data between the the memory layouts of the producer and consumer applications, as well as the layout on nonvolatile storage. The transformation engine implements functionality similar to the scatter/gather support available in some file systems. Since data subsets are defined during the initialization phase, i.e., well in advance from the time they are used to store and retrieve data, the storage system has multiple opportunities to optimize both the data layout and the transformation rules in order to increase the overall I/O performance.
In order to evaluate ASGARD's performance, we added support for ASGARD's transformation rules to Ceph's object store RADOS. We created Ceph data objects that allow custom data striping based on ASGARD's fragment definitions. Our tests with the extended RADOS show up to 5 times performance improvements for writes and 10 times performance improvements for reads over collective MPI I/O.},
	address = {Denver, CO},
	author = {Latchesar Ionkov and Carlos Maltzahn and Michael Lang},
	booktitle = {PDSW-DISCS 2017 at SC17},
	date-added = {2017-11-07 16:45:07 +0000},
	date-modified = {2020-01-04 21:39:53 -0700},
	keywords = {papers, replication, layout, language},
	month = {Nov 13},
	title = {Optimized Scatter/Gather Data Operations for Parallel Storage},
	year = {2017},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9pb25rb3YtcGRzdzE3LnBkZk8RAWoAAAAAAWoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xFpb25rb3YtcGRzdzE3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////17OCgwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADgvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjppb25rb3YtcGRzdzE3LnBkZgAOACQAEQBpAG8AbgBrAG8AdgAtAHAAZABzAHcAMQA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA2VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2lvbmtvdi1wZHN3MTcucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFUAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABww==},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9pb25rb3YtcGRzdzE3LXNsaWRlcy5wZGZPEQGIAAAAAAGIAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8YaW9ua292LXBkc3cxNy1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9bLVjQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA/LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6aW9ua292LXBkc3cxNy1zbGlkZXMucGRmAAAOADIAGABpAG8AbgBrAG8AdgAtAHAAZABzAHcAMQA3AC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD1Vc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovaW9ua292LXBkc3cxNy1zbGlkZXMucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABcAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAeg=}}

@article{hacker:bams17,
	abstract = {Software containers can revolutionize research and education with numerical weather prediction models by easing use and guaranteeing reproducibility.},
	author = {Joshua P. Hacker and John Exby and David Gill and Ivo Jimenez and Carlos Maltzahn and Timothy See and Gretchen Mullendore and Kathryn Fossell},
	date-added = {2017-08-29 05:50:47 +0000},
	date-modified = {2020-01-04 21:40:58 -0700},
	journal = {Bull. Amer. Meteor. Soc.},
	keywords = {papers, containers, nwp, learning},
	pages = {1129--1138},
	title = {A Containerized Mesoscale Model and Analysis Toolkit to Accelerate Classroom Learning, Collaborative Research, and Uncertainty Quantification},
	volume = {98},
	year = {2017},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGFja2VyLWJhbXMxNy5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RaGFja2VyLWJhbXMxNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9XKT/kAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUgAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SDpoYWNrZXItYmFtczE3LnBkZgAOACQAEQBoAGEAYwBrAGUAcgAtAGIAYQBtAHMAMQA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSC9oYWNrZXItYmFtczE3LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{jimenez:cnert17,
	abstract = {This paper introduces PopperCI, a continous integration (CI) service hosted at UC Santa Cruz that allows researchers to automate the end-to-end execution and validation of experiments. PopperCI assumes that experiments follow Popper, a convention for implementing experiments and writing articles following a DevOps approach that has been proposed recently. PopperCI runs experiments on public, private or government-fundend cloud infrastructures in a fully automated way. We describe how PopperCI executes experiments and present a use case that illustrates the usefulness of the service.},
	address = {Atlanta, GA},
	author = {Ivo Jimenez and Andrea Arpaci-Dusseau and Remzi Arpaci-Dusseau and Jay Lofstead and Carlos Maltzahn and Kathryn Mohror and Robert Ricci},
	booktitle = {Workshop on Computer and Networking Experimental Research Using Testbeds (CNERT'17) in conjunction with IEEE INFOCOM 2017},
	date-added = {2017-07-31 03:37:33 +0000},
	date-modified = {2020-01-04 21:41:20 -0700},
	keywords = {papers, reproducibility, devops},
	month = {May 1},
	title = {PopperCI: Automated Reproducibility Validation},
	year = {2017},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LWNuZXJ0MTcucGRmTxEBcgAAAAABcgACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////E2ppbWVuZXotY25lcnQxNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////Vo/T7AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIAOi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXotY25lcnQxNy5wZGYADgAoABMAagBpAG0AZQBuAGUAegAtAGMAbgBlAHIAdAAxADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADhVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1jbmVydDE3LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABXAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAc0=}}

@inproceedings{jimenez:reppar17,
	abstract = {Independent validation of experimental results in the field of systems research is a challenging task, mainly due to differences in software and hardware in computational environments. Recreating an environment that resembles the original is difficult and time-consuming. In this paper we introduce Popper, a convention based on a set of modern open source software (OSS) development principles for generating reproducible scientific publications. Concretely, we make the case for treating an article as an OSS project following a DevOps approach and applying software engineering best-practices to manage its associated artifacts and maintain the reproducibility of its findings. Popper leverages existing cloud-computing infrastructure and DevOps tools to produce academic articles that are easy to validate and extend. We present a use case that illustrates the usefulness of this approach. We show how, by following the Popper convention, reviewers and researchers can quickly get to the point of getting results without relying on the original author's intervention.
},
	address = {Orlando, FL},
	author = {Ivo Jimenez and Michael Sevilla and Noah Watkins and Carlos Maltzahn and Jay Lofstead and Kathryn Mohror and Andrea Arpac-Dusseau and Remzi Arpaci-Dusseau},
	booktitle = {4th International Workshop on Reproducibility in Parallel Computing (REPPAR) in conjunction with IPDPS 2017},
	date-added = {2017-07-31 03:27:58 +0000},
	date-modified = {2020-01-04 21:41:54 -0700},
	keywords = {papers, reproducibility, devops},
	month = {June 2},
	title = {The Popper Convention: Making Reproducible Systems Evaluation Practical},
	year = {2017},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXJlcHBhcjE3LnBkZk8RAXgAAAAAAXgAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xRqaW1lbmV6LXJlcHBhcjE3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////1aPzrwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADsvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaW1lbmV6LXJlcHBhcjE3LnBkZgAADgAqABQAagBpAG0AZQBuAGUAegAtAHIAZQBwAHAAYQByADEANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXJlcHBhcjE3LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHU}}

@inproceedings{watkins:hotstorage17,
	abstract = {Popular storage systems support diverse storage abstractions by providing important disaggregation benefits. Instead of maintaining a separate system for each abstraction, unified storage systems, in particular, support standard file, block, and object abstractions so the same hardware can be used for a wider range and a more flexible mix of applications. As large-scale unified storage systems continue to evolve to meet the requirements of an increasingly diverse set of applications and next-generation hardware, de jure approaches of the past---based on standardized interfaces---are giving way to domain-specific interfaces and optimizations. While promising, the ad-hoc strategies characteristic of current approaches to co-design are untenable.
The standardization of the POSIX I/O interface has been a major success. General adoption has allowed application developers to avoid vendor lock-in and encourages storage system designers to innovate independently. However, large-scale storage systems are generally dominated by proprietary offerings, preventing exploration of alternative interfaces when the need has presented itself. An increase in the number of special-purpose storage systems characterizes recent history in the field, including the emergence of high-performance, and highly modifiable, open-source storage systems, which enable system changes without fear of vendor lock-in. Unfortunately, evolving storage system interfaces is a challenging task requiring domain expertise, and is predicated on the willingness of programmers to forfeit the protection from change afforded by narrow interfaces.},
	address = {Santa Clara, CA},
	author = {Noah Watkins and Michael A. Sevilla and Ivo Jimenez and Kathryn Dahlgren and Peter Alvaro and Shel Finkelstein and Carlos Maltzahn},
	booktitle = {HotStorage '17},
	date-added = {2017-05-20 22:54:48 +0000},
	date-modified = {2020-01-19 15:33:14 -0800},
	keywords = {papers, storage, systems, declarative, distributed},
	month = {July 10-11},
	title = {DeclStore: Layering is for the Faint of Heart},
	year = {2017},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1ob3RzdG9yYWdlMTcucGRmTxEBggAAAAABggACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////GHdhdGtpbnMtaG90c3RvcmFnZTE3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aTMzuAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFXAAACAD0vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlc6d2F0a2lucy1ob3RzdG9yYWdlMTcucGRmAAAOADIAGAB3AGEAdABrAGkAbgBzAC0AaABvAHQAcwB0AG8AcgBhAGcAZQAxADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADtVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtaG90c3RvcmFnZTE3LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHg},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA6Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1ob3RzdG9yYWdlMTctc2xpZGVzLnBkZk8RAZwAAAAAAZwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////x93YXRraW5zLWhvdHN0b3JhZ2UxNy1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2konzwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgBELzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndhdGtpbnMtaG90c3RvcmFnZTE3LXNsaWRlcy5wZGYADgBAAB8AdwBhAHQAawBpAG4AcwAtAGgAbwB0AHMAdABvAHIAYQBnAGUAMQA3AC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAEJVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtaG90c3RvcmFnZTE3LXNsaWRlcy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAYQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAIB}}

@inproceedings{sevilla:eurosys17,
	abstract = {Storage systems need to support high-performance for special-purpose data processing applications that run on an evolving storage device technology landscape. This puts tremendous pressure on storage systems to support rapid change both in terms of their interfaces and their performance. But adapting storage systems can be difficult because unprincipled changes might jeopardize years of code-hardening and performance optimization efforts that were necessary for users to entrust their data to the storage system. We introduce the programmable storage approach, which exposes internal services and abstractions of the storage stack as building blocks for higher-level services. We also build a prototype to explore how existing abstractions of common storage system services can be leveraged to adapt to the needs of new data processing systems and the increasing variety of storage devices. We illustrate the advantages and challenges of this approach by composing existing internal abstractions into two new higher-level services: a file system metadata load balancer and a high-performance distributed shared-log. The evaluation demonstrates that our services inherit desirable qualities of the back-end storage system, including the ability to balance load, efficiently propagate service metadata, recover from failure, and navigate trade-offs between latency and throughput using leases.},
	address = {Belgrade, Serbia},
	author = {Michael A. Sevilla and Noah Watkins and Ivo Jimenez and Peter Alvaro and Shel Finkelstein and Jeff LeFevre and Carlos Maltzahn},
	booktitle = {EuroSys '17},
	date-added = {2017-03-14 22:06:29 +0000},
	date-modified = {2020-01-04 21:42:47 -0700},
	keywords = {papers, storage, systems, programmable, abstraction},
	month = {April 23-26},
	title = {Malacology: A Programmable Storage System},
	year = {2017},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1ldXJvc3lzMTcucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FXNldmlsbGEtZXVyb3N5czE3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////U7zEsAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2V2aWxsYS1ldXJvc3lzMTcucGRmAA4ALAAVAHMAZQB2AGkAbABsAGEALQBlAHUAcgBvAHMAeQBzADEANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1ldXJvc3lzMTcucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA3Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1ldXJvc3lzMTctc2xpZGVzLnBkZk8RAZIAAAAAAZIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xxzZXZpbGxhLWV1cm9zeXMxNy1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////1SuwzAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgBBLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtZXVyb3N5czE3LXNsaWRlcy5wZGYAAA4AOgAcAHMAZQB2AGkAbABsAGEALQBlAHUAcgBvAHMAeQBzADEANwAtAHMAbABpAGQAZQBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA/VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9zZXZpbGxhLWV1cm9zeXMxNy1zbGlkZXMucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABeAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfQ=}}

@inproceedings{shewmaker:icccn16,
	abstract = {No one likes waiting in traffic, whether on a road or on a computer network. Stuttering audio, slow interactive feedback, and untimely pauses in video annoy everyone and cost businesses sales and productivity. An ideal network should (1) minimize latency, (2) maximize bandwidth, (3) share resources according to a desired policy, (4) enable incremental deployment, and (5) minimize administrative overhead. Many technologies have been developed, but none yet satisfactorily address all five goals. The best performing solutions developed so far require controlled environments where coordinated modification of multiple components in the network is possible, but they suffer poor performance in more complex scenarios.
We present TCP Inigo, which uses independent delay-based algorithms on the sender and receiver (i.e. ambidextrously) to satisfy all five goals. In networks with single administrative domains, like those in data centers, Inigo's fairness, bandwidth, and latency indices are up to 1.3x better than the best deployable solution. When deployed in a more complex environment, such as across administrative domains, Inigo possesses latency distribution tail up to 42x better.},
	address = {Waikoloa, HI},
	author = {Andrew G. Shewmaker and Carlos Maltzahn and Katia Obraczka and Scott Brandt and John Bent},
	booktitle = {25th International Conference on Computer Communications and Networks (ICCCN 2016)},
	date-added = {2017-02-26 19:02:21 +0000},
	date-modified = {2020-01-04 22:58:02 -0700},
	keywords = {papers, networking, congestion, datacenter},
	month = {August 1-4},
	title = {TCP Inigo: Ambidextrous Congestion Control},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2hld21ha2VyLWljY2NuMTYucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FXNoZXdtYWtlci1pY2NjbjE2LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////U2GfqAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2hld21ha2VyLWljY2NuMTYucGRmAA4ALAAVAHMAaABlAHcAbQBhAGsAZQByAC0AaQBjAGMAYwBuADEANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2hld21ha2VyLWljY2NuMTYucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA3Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2hld21ha2VyLWljY2NuMTYtc2xpZGVzLnBkZk8RAZIAAAAAAZIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xxzaGV3bWFrZXItaWNjY24xNi1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2keCUgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgBBLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNoZXdtYWtlci1pY2NjbjE2LXNsaWRlcy5wZGYAAA4AOgAcAHMAaABlAHcAbQBhAGsAZQByAC0AaQBjAGMAYwBuADEANgAtAHMAbABpAGQAZQBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA/VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9zaGV3bWFrZXItaWNjY24xNi1zbGlkZXMucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABeAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfQ=}}

@article{jimenez:login16,
	abstract = {Independently validating experimental results in the field of computer systems research is a challenging task. Recreating an environment that resembles the one where an experiment was originally executed is a time-consuming endeavor. In this article, we present Popper, a convention (or protocol) for conducting experiments following a DevOps approach that allows researchers to make all associated artifacts publicly available with the goal of maximizing automation in the re-execution of an experiment and validation of its results.},
	author = {Ivo Jimenez and Michael Sevilla and Noah Watkins and Carlos Maltzahn and Jay Lofstead and Kathryn Mohror and Remzi Arpaci-Dusseau and Andrea Arpaci-Dusseau},
	date-added = {2017-01-17 23:58:32 +0000},
	date-modified = {2020-01-04 21:44:35 -0700},
	journal = {USENIX ;login:},
	keywords = {papers, reproducibility, devops, versioning},
	number = {4},
	pages = {20--26},
	title = {Standing on the Shoulders of Giants by Managing Scientific Experiments Like Software},
	volume = {41},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LWxvZ2luMTYucGRmTxEBcgAAAAABcgACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////E2ppbWVuZXotbG9naW4xNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////UZcFaAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIAOi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXotbG9naW4xNi5wZGYADgAoABMAagBpAG0AZQBuAGUAegAtAGwAbwBnAGkAbgAxADYALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADhVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1sb2dpbjE2LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABXAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAc0=}}

@article{klasky:jp16,
	abstract = {As the exascale computing age emerges, data related issues are becoming critical factors that determine how and where we do computing. Popular approaches used by traditional I/O solution and storage libraries become increasingly bottlenecked due to their assumptions about data movement, re-organization, and storage. While, new technologies, such as ``burst buffers'', can help address some of the short-term performance issues, it is essential that we reexamine the underlying storage and I/O infrastructure to effectively support requirements and challenges at exascale and beyond. In this paper we present a new approach to the exascale Storage System and I/O (SSIO), which is based on allowing users to inject application knowledge into the system and leverage this knowledge to better manage, store, and access large data volumes so as to minimize the time to scientific insights. Central to our approach is the distinction between the data, metadata, and the knowledge contained therein, transferred from the user to the system by describing ``utility'' of data as it ages.},
	author = {Scott A. Klasky and Hasan Abbasi and Mark Ainsworth and J. Choi and Matthew Curry and T. Kurc and Qing Liu and Jay Lofstead and Carlos Maltzahn and Manish Parashar and Norbert Podhorszki and Eric Suchyta and Fang Wang and Matthew Wolf and C. S. Chang and M. Churchill and S. Ethier},
	date-added = {2017-01-14 20:46:38 +0000},
	date-modified = {2020-01-04 21:45:50 -0700},
	journal = {J. Phys.: Conf. Ser.},
	keywords = {papers, storage, exascale, systems, hpc},
	month = {November 11},
	number = {1},
	pages = {012095},
	title = {Exascale Storage Systems the SIRIUS Way},
	volume = {759},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva2xhc2t5LWpwMTYucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D2tsYXNreS1qcDE2LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////Uo/q0AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFLAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOks6a2xhc2t5LWpwMTYucGRmAA4AIAAPAGsAbABhAHMAawB5AC0AagBwADEANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0sva2xhc2t5LWpwMTYucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==}}

@inproceedings{watkins:socc16-poster,
	address = {Santa Clara, CA},
	author = {Noah Watkins and Michael Sevilla and Ivo Jimenez and Neha Ohja and Peter Alvaro and Carlos Maltzahn},
	booktitle = {SoCC'16},
	date-added = {2016-12-21 23:16:32 +0000},
	date-modified = {2020-01-04 21:46:57 -0700},
	keywords = {shortpapers, declarative, storage, programmable},
	month = {October 5-7},
	title = {Brados: Declarative,Programmable Object Storage},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1zb2NjMTYtcG9zdGVyLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xl3YXRraW5zLXNvY2MxNi1wb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////1IBOWAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndhdGtpbnMtc29jYzE2LXBvc3Rlci5wZGYADgA0ABkAdwBhAHQAawBpAG4AcwAtAHMAbwBjAGMAMQA2AC0AcABvAHMAdABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtc29jYzE2LXBvc3Rlci5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}

@inproceedings{brummell:pmes16,
	abstract = {To raise performance beyond Moore's law scaling, Approximate Computing reduces arithmetic quality to increase operations per second or per joule. It works on only a few applications. The quality-speed tradeoff seems inescapable; however, Unum Arithmetic simultaneously raises arithmetic quality yet reduces the number of bits required. Unums extend IEEE floats (type 1) or provide custom number systems to maximize information per bit (type 2). Unums achieve Approximate Computing cost savings without sacrificing answer quality.},
	author = {Nic Brummell and John L. Gustafson and Andrew Klofas and Carlos Maltzahn and Andrew Shewmaker},
	booktitle = {PMES 2016},
	date-added = {2016-10-21 17:31:51 +0000},
	date-modified = {2020-01-04 21:47:19 -0700},
	keywords = {papers, math, computation},
	month = {November 14},
	title = {Unum Arithmetic: Better Math with Clearer Tradeoffs},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYnJ1bW1lbGwtcG1lczE2LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNicnVtbWVsbC1wbWVzMTYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////1JvLAgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQgAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpCOmJydW1tZWxsLXBtZXMxNi5wZGYADgAoABMAYgByAHUAbQBtAGUAbABsAC0AcABtAGUAcwAxADYALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9CL2JydW1tZWxsLXBtZXMxNi5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{hacker:wrfws16,
	address = {Boulder, CO},
	author = {Josh Hacker and John Exby and David Gill and Ivo Jimenez and Carlos Maltzahn and Tim See and Gretchen Mullendore},
	booktitle = {17th annual WRF Users Workshop},
	date-added = {2016-10-19 08:18:01 +0000},
	date-modified = {2016-10-19 08:22:45 +0000},
	month = {June 27 - July 2},
	title = {Collaborative WRF-based research and education with reproducible numerical weather prediction enabled by software containers},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGFja2VyLXdyZndzMTYtc2xpZGVzLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xloYWNrZXItd3Jmd3MxNi1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+T0tAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSAAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpIOmhhY2tlci13cmZ3czE2LXNsaWRlcy5wZGYADgA0ABkAaABhAGMAawBlAHIALQB3AHIAZgB3AHMAMQA2AC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9IL2hhY2tlci13cmZ3czE2LXNsaWRlcy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj},
	bdsk-url-1 = {http://www2.mmm.ucar.edu/wrf/users/workshops/WS2016/oral_presentations/4.3.pdf}}

@inproceedings{hacker:ams16,
	author = {Josh Hacker and John Exby and Nick Chartier and David Gill and Ivo Jimenez and Carlos Maltzahn and Gretchen Mullendore},
	booktitle = {American Meteorological Society 32nd Conference on Environmental Processing Technologies},
	date-added = {2016-10-19 08:14:20 +0000},
	date-modified = {2019-12-26 16:07:15 -0800},
	keywords = {papers, reproducibility, containers},
	month = {January},
	title = {Collaborative Research and Education with Numerical Weather Prediction Enabled by Software Containers},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGFja2VyLWFtczE2LnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBoYWNrZXItYW1zMTYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+Tg3gAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSAAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpIOmhhY2tlci1hbXMxNi5wZGYAAA4AIgAQAGgAYQBjAGsAZQByAC0AYQBtAHMAMQA2AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSC9oYWNrZXItYW1zMTYucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGFja2VyLWFtczE2LXNsaWRlcy5wZGZPEQF8AAAAAAF8AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8XaGFja2VyLWFtczE2LXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9/k4JUAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUgAAAIAPC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SDpoYWNrZXItYW1zMTYtc2xpZGVzLnBkZgAOADAAFwBoAGEAYwBrAGUAcgAtAGEAbQBzADEANgAtAHMAbABpAGQAZQBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA6VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSC9oYWNrZXItYW1zMTYtc2xpZGVzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABZAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdk=}}

@inproceedings{watkins:pdsw15,
	abstract = {Traditionally storage has not been part of a programming model's semantics and is added only as an I/O library interface. As a result, programming models, languages, and storage systems are limited in the optimizations they can perform for I/O operations, as the semantics of the I/O library is typically at the level of transfers of blocks of uninterpreted bits, with no accompanying knowledge of how those bits are used by the application. For many HPC applications where I/O operations for analyzing and checkpointing large data sets are a non-negligible portion of the overall execution time, such a ``know nothing'' I/O design has negative performance implications.
We propose an alternative design where the I/O semantics are integrated as part of the programming model, and a common data model is used throughout the entire memory and storage hierarchy enabling storage and application level co-optimizations. We demonstrate these ideas through the integration of storage services within the Legion [2] runtime and present preliminary results demonstrating the integration.},
	address = {Austin, TX},
	author = {Noah Watkins and Zhihao Jia and Galen Shipman and Carlos Maltzahn and Alex Aiken and Pat McCormick},
	booktitle = {PDSW'15},
	date-added = {2016-08-31 06:03:13 +0000},
	date-modified = {2020-01-04 21:48:24 -0700},
	keywords = {papers, storage, systems, optimization, parallel, distributed, runtime},
	month = {November 16},
	title = {Automatic and transparent I/O optimization with storage integrated application runtime support},
	year = {2015},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1wZHN3MTUucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EndhdGtpbnMtcGRzdzE1LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////T68I/AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFXAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlc6d2F0a2lucy1wZHN3MTUucGRmAAAOACYAEgB3AGEAdABrAGkAbgBzAC0AcABkAHMAdwAxADUALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtcGRzdzE1LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@techreport{watkins:ucsctr16,
	abstract = {As applications scale to new levels and migrate into cloud environments, there has been a significant departure from the exclusive reliance on the POSIX file I/O interface. However in doing so, application often discover a lack of services, forcing them to use bolt-on features or take on the responsibility of critical data management tasks. This often results in duplication of complex software with extreme correctness requirements. Instead, wouldn't it be nice if an application could just convey what it wanted out of a storage system, and have the storage system understand?
The central question we address in this paper is whether or not the design delta between two storage systems can be expressed in a form such that one system becomes little more than a configuration of the other. Storage systems should expose their useful services in a way that separates performance from correctness, allowing for their safe reuse. After all, hardened code in storage systems protects countless value, and its correctness is only as good as the stress we place on it. We demonstrate these concepts by synthesizing the CORFU high-performance shared-log abstraction in Ceph through minor modifications of existing sub-systems that are orthogonal to correctness.},
	address = {Santa Cruz, CA},
	author = {Noah Watkins and Michael Sevilla and Carlos Maltzahn},
	date-added = {2016-08-26 18:45:34 +0000},
	date-modified = {2020-01-04 21:48:55 -0700},
	institution = {UC Santa Cruz},
	keywords = {papers, programmable, storage, systems},
	month = {June 11},
	number = {UCSC-SOE-15-12},
	title = {The Case for Programmable Object Storage Systems},
	type = {Tech. rept.},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy11Y3NjdHIxNi5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Ud2F0a2lucy11Y3NjdHIxNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9Pl3MoAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVcAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6Vzp3YXRraW5zLXVjc2N0cjE2LnBkZgAADgAqABQAdwBhAHQAawBpAG4AcwAtAHUAYwBzAGMAdAByADEANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy11Y3NjdHIxNi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@unpublished{maltzahn:si2ws-poster16,
	author = {Carlos Maltzahn and others},
	date-added = {2016-08-18 06:04:41 +0000},
	date-modified = {2020-01-04 21:49:20 -0700},
	keywords = {shortpapers, overview, bigdata, reproducibility},
	month = {February 16},
	note = {Poster at SI2 Workshop},
	title = {Big Weather Web: A common and sustainable big data infrastructure in support of weather prediction research and education in universities},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tc2kyd3MtcG9zdGVyMTYucGRmTxEBjAAAAAABjAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////G21hbHR6YWhuLXNpMndzLXBvc3RlcjE2LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////T2p37AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFNAAACAEAvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOk06bWFsdHphaG4tc2kyd3MtcG9zdGVyMTYucGRmAA4AOAAbAG0AYQBsAHQAegBhAGgAbgAtAHMAaQAyAHcAcwAtAHAAbwBzAHQAZQByADEANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tc2kyd3MtcG9zdGVyMTYucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAF0AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB7Q==}}

@techreport{jimenez:ucsctr16,
	abstract = {Independent validation of experimental results in the field of parallel and distributed systems research is a challenging task, mainly due to changes and differences in software and hardware in computational environments. Recreating an environment that resembles the original systems research is difficult and time-consuming. In this paper we introduce the Popper Convention, a set of principles for producing scientific publications. Concretely, we make the case for treating an article as an open source software (OSS) project, applying software engineering best-practices to manage its associated artifacts and maintain the reproducibility of its findings. Leveraging existing cloud-computing infrastructure and modern OSS development tools to produce academic articles that are easy to validate. We present our prototype file system, GassyFS, as a use case for illustrating the usefulness of this approach. We show how, by following Popper, re-executing experiments on multiple platforms is more practical, allowing reviewers and students to quickly get to the point of getting results without relying on the author's intervention.},
	address = {Santa Cruz, CA},
	author = {Ivo Jimenez and Michael Sevilla and Noah Watkins and Carlos Maltzahn},
	date-added = {2016-08-18 05:58:51 +0000},
	date-modified = {2020-01-04 21:49:52 -0700},
	institution = {UC Santa Cruz},
	keywords = {papers, reproducibility, systems, evaluation},
	month = {May 19},
	number = {UCSC-SOE-16-10},
	title = {Popper: Making Reproducible Systems Performance Evaluation Practical},
	type = {Tech. rept.},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXVjc2N0cjE2LnBkZk8RAXgAAAAAAXgAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xRqaW1lbmV6LXVjc2N0cjE2LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////09qSCgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADsvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaW1lbmV6LXVjc2N0cjE2LnBkZgAADgAqABQAagBpAG0AZQBuAGUAegAtAHUAYwBzAGMAdAByADEANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXVjc2N0cjE2LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHU}}

@inproceedings{jimenez:varsys16,
	abstract = {Independent validation of experimental results in the field of parallel and distributed systems research is a challenging task, mainly due to changes and differences in software and hardware in computational environments. In particular, when an experiment runs on different hardware than the one where it originally executed, predicting the differences in results is difficult. In this paper, we introduce an architecture-independent method for characterizing the performance of a machine by obtaining a profile (a vector of microbenchark results) that we use to quantify the variability between two hardware platforms. We propose the use of isolation features that OS-level virtualization offers to reduce the variability observed when validating application performance across multiple machines. Our results show that, using our variability characterization methodology, we can correctly predict the variability bounds of CPU-intensive applications, as well as reduce it by up to 2.8x if we make use of CPU bandwidth limitations, depending on the opcode mix of an application, as well as generational and architectural differences between two hardware platforms.},
	address = {Chicago, IL},
	author = {Ivo Jimenez and Carlos Maltzahn and Jay Lofstead and Adam Moody and Kathryn Mohror and Remzi Arpaci-Dusseau and Andrea Arpaci-Dusseau},
	booktitle = {VarSys'16},
	date-added = {2016-05-19 13:24:07 +0000},
	date-modified = {2020-01-04 21:50:21 -0700},
	keywords = {papers, reproducibility,},
	month = {May 23},
	title = {Characterizing and Reducing Cross-Platform Performance Variability Using OS-level Virtualization},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXZhcnN5czE2LnBkZk8RAXgAAAAAAXgAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xRqaW1lbmV6LXZhcnN5czE2LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////09yhCAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADsvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaW1lbmV6LXZhcnN5czE2LnBkZgAADgAqABQAagBpAG0AZQBuAGUAegAtAHYAYQByAHMAeQBzADEANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXZhcnN5czE2LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHU}}

@inproceedings{manzanares:hotstorage16,
	address = {Denver, CO},
	author = {Manzanares and Noah Watkins and Cyril Guyot and Damien LeMoal and Carlos Maltzahn and Zvonimir Bandic},
	booktitle = {HotStorage '16},
	date-added = {2016-05-17 21:34:02 +0000},
	date-modified = {2016-05-17 21:36:35 +0000},
	keywords = {papers, storagemedium, shingledrecording, os, allocation},
	month = {June 20-21},
	title = {ZEA, A Data Management Approach for SMR},
	year = {2016},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFuemFuYXJlcy1ob3RzdG9yYWdlMTYucGRmTxEBjAAAAAABjAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////G21hbnphbmFyZXMtaG90c3RvcmFnZTE2LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////TYNeCAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFNAAACAEAvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOk06bWFuemFuYXJlcy1ob3RzdG9yYWdlMTYucGRmAA4AOAAbAG0AYQBuAHoAYQBuAGEAcgBlAHMALQBoAG8AdABzAHQAbwByAGEAZwBlADEANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL00vbWFuemFuYXJlcy1ob3RzdG9yYWdlMTYucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAF0AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB7Q==}}

@inproceedings{sevilla:sc15,
	abstract = {Migrating resources is a useful tool for balancing load in a distributed system, but it is difficult to determine when to move resources, where to move resources, and how much of them to move. We look at resource migration for file system metadata and show how CephFS's dynamic subtree partitioning approach can exploit varying degrees of locality and balance because it can partition the namespace into variable sized units. Unfortunately, the current metadata balancer is complicated and difficult to control because it struggles to address many of the general resource migration challenges inherent to the metadata management problem. To help decouple policy from mechanism, we introduce a programmable storage system that lets the designer inject custom balancing logic. We show the flexibility and transparency of this approach by replicating the strategy of a state-of-the-art metadata balancer and conclude by comparing this strategy to other custom balancers on the same system.},
	address = {Austin, TX},
	author = {Michael Sevilla and Noah Watkins and Carlos Maltzahn and Ike Nassi and Scott Brandt and Sage Weil and Greg Farnum and Sam Fineberg},
	booktitle = {SC '15},
	date-added = {2015-07-11 20:49:14 +0000},
	date-modified = {2020-01-04 21:51:04 -0700},
	keywords = {papers, metadata, management, loadbalancing, programmable, distributed, systems},
	month = {November},
	title = {Mantle: A Programmable Metadata Load Balancer for the Ceph File System},
	year = {2015},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1zYzE1LnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBzZXZpbGxhLXNjMTUucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2fDvTAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtc2MxNS5wZGYAAA4AIgAQAHMAZQB2AGkAbABsAGEALQBzAGMAMQA1AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9zZXZpbGxhLXNjMTUucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=}}

@techreport{watkins:ucsctr15,
	abstract = {As applications scale to new levels and migrate into cloud environments, there has been a significant departure from the exclusive reliance on the POSIX file I/O interface. However in doing so, application often discover a lack of services, forcing them to use bolt-on features or take on the respon- sibility of critical data management tasks. This often results in duplication of complex software with extreme correctness requirements. Instead, wouldn't it be nice if an application could just convey what it wanted out of a storage system, and have the storage system understand?
The central question we address in this paper is whether or not the design delta between two storage systems can be expressed in a form such that one system becomes lit- tle more than a configuration of the other. Storage systems should expose their useful services in a way that separates performance from correctness, allowing for their safe reuse. After all, hardened code in storage systems protects count- less value, and its correctness is only as good as the stress we place on it. We demonstrate these concepts by synthesiz- ing the CORFU high-performance shared-log abstraction in Ceph through minor modifications of existing sub-systems that are orthogonal to correctness.},
	author = {Noah Watkins and Michael Sevilla and Carlos Maltzahn},
	date-added = {2015-06-11 07:31:24 +0000},
	date-modified = {2020-01-04 21:51:36 -0700},
	institution = {UC Santa Cruz},
	keywords = {papers, programmable, storage, systems},
	month = {June 11},
	number = {UCSC-SOE-15-12},
	title = {The Case for Programmable Object Storage Systems},
	type = {Tech. rept.},
	year = {2015},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy11Y3NjdHIxNS5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Ud2F0a2lucy11Y3NjdHIxNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9Geh5oAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVcAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6Vzp3YXRraW5zLXVjc2N0cjE1LnBkZgAADgAqABQAdwBhAHQAawBpAG4AcwAtAHUAYwBzAGMAdAByADEANQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy11Y3NjdHIxNS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@inproceedings{skourtis:inflow14,
	abstract = {We want to create a scalable flash storage system that provides read/write separation and uses erasure coding to provide reliability without the storage cost of replication. Flash on Rails [19] is a system for enabling consistent performance in flash storage by physically separating reads from writes through redundancy. In principle, Rails supports erasure codes. However, it has only been evaluated using replication in small arrays, so it is currently uncertain how it would scale with erasure coding.
In this work we consider the applicability of erasure coding in Rails, in a new system called eRails. We consider the effects of computation due to encoding/decoding on the raw performance, as well as its effect on performance consistency. We demonstrate that up to a certain number of drives the performance remains unaffected while the computation cost remains modest. After that point, the computational cost grows quickly due to coding itself making further scaling inefficient. To support an arbitrary number of drives we present a design allowing us to scale eRails by constructing overlapping erasure coding groups that preserve read/write separation. Finally, through benchmarks we demonstrate that eRails achieves read/write separation and consistent read performance under read/write workloads.
},
	address = {Broomfield, CO},
	author = {Dimitris Skourtis and Dimitris Achlioptas and Noah Watkins and Carlos Maltzahn and Scott Brandt},
	booktitle = {INFLOW '14 (at OSDI'14)},
	date-added = {2014-12-06 21:50:01 +0000},
	date-modified = {2020-01-04 21:52:42 -0700},
	keywords = {papers, erasurecodes, performance, flash, garbagecollection, predictable},
	month = {October 5},
	title = {Erasure Coding \& Read/Write Separation in Flash Storage},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtaW5mbG93MTQucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FXNrb3VydGlzLWluZmxvdzE0LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////ashYZAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2tvdXJ0aXMtaW5mbG93MTQucGRmAA4ALAAVAHMAawBvAHUAcgB0AGkAcwAtAGkAbgBmAGwAbwB3ADEANAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtaW5mbG93MTQucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}

@techreport{shewmaker:ucsctr14,
	abstract = {The RUN (Reduction to UNiprocessor) [18, 19, 13] algorithm was first described by Regnier, et al. as a novel and elegant solution to real-time multiprocessor scheduling. The first practical implementation of RUN [3] created by Compagnin, et. al., both verified the simulation results and showed that it can be efficiently implemented on top of standard operating system primitives. While RUN is now the proven best solution for scheduling fixed rate tasks on multiprocessors, it can also be applied to other resources. This technical report briefly describes RUN and how it could be used in any situation involving an array of multiple resources where some form of preemptions and migrations are allowed (although must be minimized). It also describes how buffers can be sanity checked in a system where a RUN-scheduled resource is consuming data from another RUN-scheduled resource.
},
	address = {Santa Cruz, CA},
	author = {Andrew Shewmaker and Carlos Maltzahn and Katia Obraczka and Scott Brandt},
	date-added = {2014-09-06 04:13:59 +0000},
	date-modified = {2020-01-04 21:53:19 -0700},
	institution = {University of California at Santa Cruz},
	keywords = {papers, scheduling, networking, realtime, performance, management},
	month = {July 23},
	number = {UCSC-SOE-14-08},
	title = {Run, Fatboy, Run: Applying the Reduction to Uniprocessor Algorithm to Other Wide Resources},
	type = {Tech. rept.},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2hld21ha2VyLXVjc2N0cjE0LnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZzaGV3bWFrZXItdWNzY3RyMTQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////0C/YwgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNoZXdtYWtlci11Y3NjdHIxNC5wZGYAAA4ALgAWAHMAaABlAHcAbQBhAGsAZQByAC0AdQBjAHMAYwB0AHIAMQA0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9zaGV3bWFrZXItdWNzY3RyMTQucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=}}

@inproceedings{sevilla:lspp14,
	abstract = {Reading input from primary storage (i.e. the ingest phase) and aggregating results (i.e. the merge phase) are important pre- and post-processing steps in large batch computations. Unfortunately, today's data sets are so large that the ingest and merge job phases are now performance bottlenecks. In this paper, we mitigate the ingest and merge bottlenecks by leveraging the scale-up MapReduce model. We introduce an ingest chunk pipeline and a merge optimization that increases CPU utilization (50 - 100\%) and job phase speedups (1.16x - 3.13x) for the ingest and merge phases. Our techniques are based on well-known algorithms and scale-out MapReduce optimizations, but applying them to a scale-up computation framework to mitigate the ingest and merge bottlenecks is novel.},
	address = {Phoenix, AZ},
	author = {Michael Sevilla and Ike Nassi and Kleoni Ioannidou and Scott Brandt and Carlos Maltzahn},
	booktitle = {LSPP at IPDPS 2014},
	date-added = {2014-07-11 20:56:28 +0000},
	date-modified = {2020-01-04 21:54:00 -0700},
	keywords = {papers, mapreduce, sharedmemory, performance},
	month = {May 23},
	title = {SupMR: Circumventing Disk and Memory Bandwidth Bottlenecks for Scale-up MapReduce},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1sc3BwMTQucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EnNldmlsbGEtbHNwcDE0LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////YbSM7AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2V2aWxsYS1sc3BwMTQucGRmAAAOACYAEgBzAGUAdgBpAGwAbABhAC0AbABzAHAAcAAxADQALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtbHNwcDE0LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@inproceedings{sevilla:discs13,
	abstract = {When data grows too large, we scale to larger systems, either by scaling out or up. It is understood that scale-out and scale-up have different complexities and bottlenecks but a thorough comparison of the two architectures is challenging because of the diversity of their programming interfaces, their significantly different system environments, and their sensitivity to workload specifics. In this paper, we propose a novel comparison framework based on MapReduce that accounts for the application, its requirements, and its input size by considering input, software, and hardware parameters. Part of this framework requires implementing scale-out properties on scale-up and we discuss the complex trade-offs, interactions, and dependencies of these properties for two specific case studies (word count and sort). This work lays the foundation for future work in quantifying design decisions and in building a system that automatically compares architectures and selects the best one.},
	address = {Denver, CO},
	author = {Micheal Sevilla and Ike Nassi and Kleoni Ioannidou and Scott Brandt and Carlos Maltzahn},
	booktitle = {DISCS 2013 at SC13},
	date-added = {2014-07-11 20:53:58 +0000},
	date-modified = {2020-01-04 21:55:43 -0700},
	keywords = {papers, scalable, systems, distributed, sharedmemory},
	month = {November 18},
	title = {A Framework for an In-depth Comparison of Scale-up and Scale-out},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1kaXNjczEzLnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNzZXZpbGxhLWRpc2NzMTMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2G0jEQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtZGlzY3MxMy5wZGYADgAoABMAcwBlAHYAaQBsAGwAYQAtAGQAaQBzAGMAcwAxADMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtZGlzY3MxMy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@article{rose:sej91,
	abstract = {In the context of the ESPRIT project DAIDA, we have developed an experimental environment intended to achieve consistency-in-the-large in a multi-person setting. Our conceptual model of configuration processes, the CAD$\,^{\circ}$ model, centres around decisions that work on configured objects and are subject to structured conversations. The environment, extending the knowledge-based software information system ConceptBase, supports co-operation within development teams by integrating models and tools for argumentation and co-ordination with those for versioning and configuration. Versioning decisions are discussed and decided on within an argument editor, and executed by specialised tools for programming-in-the-small. Tasks are assigned and monitored through a contract tool, and carried out within co-ordinated workspaces under a conflict-tolerant transaction protocol. Consistent configuration and reconfiguration of local results is supported by a logic-based configuration assistant.},
	author = {Thomas Rose and Matthias Jarke and Martin Gocek and Carlos Maltzahn and Hans Nissen},
	date-added = {2014-06-27 02:43:48 +0000},
	date-modified = {2020-01-04 21:56:48 -0700},
	journal = {Software Engineering Journal},
	keywords = {papers, software, programming, collaborative},
	month = {September},
	number = {5},
	pages = {332--346},
	title = {A Decision-Based Configuration Process Environment},
	volume = {6},
	year = {1991},
	bdsk-url-1 = {http://dx.doi.org/10.1049/sej.1991.0034}}

@misc{hacker:ncar14,
	author = {Joshua Hacker and Carlos Maltzahn and Gretchen Mullendore and Russ Schumacher},
	date-added = {2014-06-21 21:53:41 +0000},
	date-modified = {2014-06-24 17:21:49 +0000},
	howpublished = {Web page. www.rap.ucar.edu/staff/hacker/BigWeather.pdf},
	keywords = {papers, nwp, geoscience, simulation, infrastructure},
	month = {January},
	title = {Big Weather - A workshop on overcoming barriers to distributed production, storage, and analysis of multi-model ensemble forecasts in support of weather prediction research and education in universities},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGFja2VyLW5jYXIxNC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RaGFja2VyLW5jYXIxNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////8+JiokAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUgAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SDpoYWNrZXItbmNhcjE0LnBkZgAOACQAEQBoAGEAYwBrAGUAcgAtAG4AYwBhAHIAMQA0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSC9oYWNrZXItbmNhcjE0LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{skourtis:atc14,
	abstract = {Modern applications and virtualization require fast and predictable storage. Hard-drives have low and unpredictable performance, while keeping everything in DRAM is still prohibitively expensive or unnecessary in many cases. Solid-state drives offer a balance between performance and cost and are becoming increasingly popular in storage systems, playing the role of large caches and permanent storage. Although their read performance is high and predictable, SSDs frequently block in the presence of writes, exceeding hard-drive latency and leading to unpredictable performance.
Many systems with mixed workloads have low latency requirements or require predictable performance and guarantees. In such cases the performance variance of SSDs becomes a problem for both predictability and raw performance. In this paper, we propose Rails, a design based on redundancy, which provides predictable performance and low latency for reads under read/write workloads by physically separating reads from writes. More specifically, reads achieve read-only performance while writes perform at least as well as before. We evaluate our design using micro-benchmarks and real traces, illustrating the performance benefits of Rails and read/write separation in solid-state drives.},
	address = {Philadelphia, PA},
	author = {Dimitris Skourtis and Dimitris Achlioptas and Noah Watkins and Carlos Maltzahn and Scott Brandt},
	booktitle = {USENIX ATC '14},
	date-added = {2014-05-10 00:06:33 +0000},
	date-modified = {2020-01-04 21:58:01 -0700},
	keywords = {papers, flash, performance, redundancy, qos},
	month = {June 19-20},
	title = {Flash on Rails: Consistent Flash Performance through Redundancy},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtYXRjMTQucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EnNrb3VydGlzLWF0YzE0LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////TPv/BAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2tvdXJ0aXMtYXRjMTQucGRmAAAOACYAEgBzAGsAbwB1AHIAdABpAHMALQBhAHQAYwAxADQALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3Nrb3VydGlzLWF0YzE0LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@inproceedings{crume:msst14,
	abstract = {Predicting access times is a crucial part of predicting hard disk drive performance. Existing approaches use white-box modeling and require intimate knowledge of the internal layout of the drive, which can take months to extract. Automatically learning this behavior is a much more desirable approach, requiring less expert knowledge, fewer assumptions, and less time. While previous research has created black-box models of hard disk drive performance, none have shown low per-request errors. A barrier to machine learning of access times has been the existence of periodic behavior with high, unknown frequencies. We identify these high frequencies with Fourier analysis and include them explicitly as input to the model. In this paper we focus on the simulation of access times for random read workloads within a single zone. We are able to automatically generate and tune request-level access time models with mean absolute error less than 0.15 ms. To our knowledge this is the first time such a fidelity has been achieved with modern disk drives using machine learning. We are confident that our approach forms the core for automatic generation of access time models that include other workloads and span across entire disk drives, but more work remains.},
	address = {Santa Clara, CA},
	author = {Adam Crume and Carlos Maltzahn and Lee Ward and Thomas Kroeger and Matthew Curry},
	booktitle = {MSST '14},
	date-added = {2014-05-10 00:02:27 +0000},
	date-modified = {2020-01-04 21:58:30 -0700},
	keywords = {papers, machinelearning, modeling, simulation, storagemedium, autotuning},
	month = {June 2-6},
	title = {Automatic Generation of Behavioral Hard Disk Drive Access Time Models},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY3J1bWUtbXNzdDE0LnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBjcnVtZS1tc3N0MTQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2gRZdAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQwAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpDOmNydW1lLW1zc3QxNC5wZGYAAA4AIgAQAGMAcgB1AG0AZQAtAG0AcwBzAHQAMQA0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jcnVtZS1tc3N0MTQucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=}}

@inproceedings{maltzahn:gamifir14,
	abstract = {The super-exponential growth of digital data world-wide is matched by personal digital archives containing songs, ebooks, audio books, photos, movies, textual documents, and documents of other media types. For many types of media it is usually a lot easier to add items than to keep archives from falling into disarray and incurring data loss. The overhead of maintaining these personal archives frequently surpasses the time and patience their owners are willing to dedicate to this important task. The promise of gamification in this context is to significantly extend the willingness to maintain personal archives by enhancing the experience of personal archive management.
In this paper we focus on a subcategory of personal archives which we call private archives. These are archives that for a variety of reasons the owner does not want to make available online and which consequently limits archive maintenance to an individual activity and does not allow any form of crowdsourcing out of fear for unwanted information leaks. As an example of private digital archive maintenance gamification we describe InfoGarden, a casual game that turns document tagging into an individual activity of (metaphorically) weeding a garden and protecting plants from gophers and includes a reward system that encourages orthogonal tag usage. The paper concludes with lessons learned and summarizes remaining challenges.},
	address = {Amsterdam, Netherlands},
	author = {Carlos Maltzahn and Arnav Jhala and Michael Mateas and Jim Whitehead},
	booktitle = {GamifIR'14 at ECIR'14},
	date-added = {2014-04-22 01:27:12 +0000},
	date-modified = {2020-01-04 21:59:05 -0700},
	keywords = {papers, gamification, games, archive, digitalpreservation, tagging},
	month = {April 13},
	title = {Gamification of Private Digital Data Archive Management},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tZ2FtaWZpcjE0LnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZtYWx0emFobi1nYW1pZmlyMTQucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////z1XI+wAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWdhbWlmaXIxNC5wZGYAAA4ALgAWAG0AYQBsAHQAegBhAGgAbgAtAGcAYQBtAGkAZgBpAHIAMQA0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWx0emFobi1nYW1pZmlyMTQucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=}}

@techreport{crume:ucsctr14,
	abstract = {Predicting access times is a crucial part of predicting hard disk drive performance. Existing approaches use white-box modeling and require intimate knowledge of the internal layout of the drive, which can take months to extract. Automatically learning this behavior is a much more desirable approach, requiring less expert knowledge, fewer assumptions, and less time. While previous research has created black-box models of hard disk drive performance, none have shown low per-request errors. A barrier to machine learning of access times has been the existence of periodic behavior with high, unknown frequencies. We identify these high frequencies with Fourier analysis and include them explicitly as input to the model. In this paper we focus on the simulation of access times for random read workloads within a single zone. We are able to automatically generate and tune request-level access time models with mean absolute error less than 0.15 ms. To our knowledge this is the first time such a fidelity has been achieved with modern disk drives using machine learning. We are confident that our approach forms the core for automatic generation of access time models that include other workloads and span across entire disk drives, but more work remains.},
	address = {Santa Cruz, CA},
	author = {Adam Crume and Carlos Maltzahn and Lee Ward and Thomas Kroeger and Matthew Curry},
	date-added = {2014-03-28 22:23:23 +0000},
	date-modified = {2020-01-04 21:59:33 -0700},
	institution = {University of California at Santa Cruz},
	keywords = {papers, machinelearning, storagemedium, simulation, modeling, autotuning, neuralnetworks},
	month = {March 28},
	number = {UCSC-SOE-14-02},
	title = {Automatic Generation of Behavioral Hard Disk Drive Access Time Models},
	type = {Technical Report},
	year = {2014},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY3J1bWUtdWNzY3RyMTQucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EmNydW1lLXVjc2N0cjE0LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////PW0ZKAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y3J1bWUtdWNzY3RyMTQucGRmAAAOACYAEgBjAHIAdQBtAGUALQB1AGMAcwBjAHQAcgAxADQALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9DL2NydW1lLXVjc2N0cjE0LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@inproceedings{jimenez:pdsw13poster,
	address = {Denver, CO},
	author = {Ivo Jimenez and Carlos Maltzahn and Jai Dayal and Jay Lofstead},
	booktitle = {Poster Session at PDSW 2013 at SC13},
	date-added = {2013-12-08 21:27:53 +0000},
	date-modified = {2020-01-04 21:59:53 -0700},
	keywords = {shortpapers, transactions, hpc, exascale, parallel, datamanagement},
	month = {November 17},
	title = {Exploring Trade-offs in Transactional Parallel Data Movement},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXBkc3cxM3Bvc3Rlci5wZGZPEQGIAAAAAAGIAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8YamltZW5lei1wZHN3MTNwb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////87KJPYAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA/LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6amltZW5lei1wZHN3MTNwb3N0ZXIucGRmAAAOADIAGABqAGkAbQBlAG4AZQB6AC0AcABkAHMAdwAxADMAcABvAHMAdABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD1Vc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1wZHN3MTNwb3N0ZXIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABcAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAeg=}}

@inproceedings{crume:pdsw13,
	abstract = {Predicting access times is a crucial part of predicting hard disk drive performance. Existing approaches use white-box modeling and require intimate knowledge of the internal layout of the drive, which can take months to extract. Automatically learning this behavior is a much more desirable approach, requiring less expert knowledge, fewer assumptions, and less time. Others have created behavioral models of hard disk drive performance, but none have shown low per-request errors. A barrier to machine learning of access times has been the existence of periodic behavior with high, unknown frequencies. We show how hard disk drive access times can be predicted to within 0.83 ms using a neural net after these frequencies are found using Fourier analysis.
},
	address = {Denver, CO},
	author = {Adam Crume and Carlos Maltzahn and Lee Ward and Thomas Kroeger and Matthew Curry and Ron Oldfield},
	booktitle = {PDSW'13},
	date-added = {2013-11-30 19:31:15 +0000},
	date-modified = {2020-01-04 22:00:13 -0700},
	keywords = {papers, machinelearning, performance, modeling, storagemedium, neuralnetworks},
	month = {November 18},
	title = {Fourier-Assisted Machine Learning of Hard Disk Drive Access Time Models},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY3J1bWUtcGRzdzEzLnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBjcnVtZS1wZHN3MTMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zr99uQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQwAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpDOmNydW1lLXBkc3cxMy5wZGYAAA4AIgAQAGMAcgB1AG0AZQAtAHAAZABzAHcAMQAzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jcnVtZS1wZHN3MTMucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=}}

@inproceedings{skourtis:inflow13,
	abstract = {Solid-state drives are becoming increasingly popular in enterprise storage systems, playing the role of large caches and permanent storage. Although SSDs provide faster random access than hard-drives, their performance under read/write workloads is highly variable often exceeding that of harddrives (e.g., taking 100ms for a single read). Many systems with mixed workloads have low latency requirements, or require predictable performance and guarantees. In such cases, the performance variance of SSDs becomes a problem for both predictability and raw performance.
In this paper, we propose a design based on redundancy, which provides high performance and low latency for reads under read/write workloads by physically separating reads from writes. More specifically, reads achieve read-only performance while writes perform at least as good as before. We evaluate our design using micro-benchmarks and real traces, illustrating the performance benefits of read/write separation in solid-state drives.},
	address = {Farmington, PA},
	author = {Dimitris Skourtis and Dimitris Achlioptas and Carlos Maltzahn and Scott Brandt},
	booktitle = {INFLOW '13},
	date-added = {2013-09-11 06:19:23 +0000},
	date-modified = {2020-01-04 22:04:04 -0700},
	keywords = {papers, flash, erasurecodes, redundancy, storage, distributed, systems},
	month = {November 3},
	title = {High Performance \& Low Latency in Solid-State Drives Through Redundancy},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtaW5mbG93MTMucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FXNrb3VydGlzLWluZmxvdzEzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////OXx4SAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2tvdXJ0aXMtaW5mbG93MTMucGRmAA4ALAAVAHMAawBvAHUAcgB0AGkAcwAtAGkAbgBmAGwAbwB3ADEAMwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtaW5mbG93MTMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}

@inproceedings{watkins:bdmc13,
	abstract = {The emergence of high-performance open-source storage systems is allowing application and middleware developers to consider non-standard storage system interfaces. In contrast to the practice of virtually always designing for file-like byte-stream interfaces, co-designed domain-specific storage system interfaces are becoming increasingly common. However, in order for developers to evolve interfaces in high-availability storage systems, services are needed for in-vivo interface evolution that allows the development of interfaces in the context of a live system. Current clustered storage systems that provide interface customizability expose primitive services for managing ad-hoc interfaces. For maximum utility, the ability to create, evolve, and deploy dynamic storage interfaces is needed. However, in large-scale clusters, dynamic interface instantiation will require system-level support that ensures interface version consistency among storage nodes and client applications. We propose that storage systems should provide services that fully manage the life-cycle of dynamic interfaces that are aligned with the common branch-and-merge form of software maintenance, including isolated development workspaces that can be combined into existing production views of the system.},
	address = {Aachen, Germany},
	author = {Noah Watkins and Carlos Maltzahn and Scott Brandt and Ian Pye and Adam Manzanares},
	booktitle = {BigDataCloud '13 (in conjunction with EuroPar 2013)},
	date-added = {2013-07-21 00:37:45 +0000},
	date-modified = {2020-05-10 19:29:48 -0700},
	keywords = {papers, datamodel, scripting, storage, systems, software-defined, programmable},
	month = {August 26},
	title = {In-Vivo Storage System Development},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1iZG1jMTMucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EndhdGtpbnMtYmRtYzEzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////OUjOtAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFXAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlc6d2F0a2lucy1iZG1jMTMucGRmAAAOACYAEgB3AGEAdABrAGkAbgBzAC0AYgBkAG0AYwAxADMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtYmRtYzEzLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1iZG1jMTMtc2xpZGVzLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xl3YXRraW5zLWJkbWMxMy1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zxhuMgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndhdGtpbnMtYmRtYzEzLXNsaWRlcy5wZGYADgA0ABkAdwBhAHQAawBpAG4AcwAtAGIAZABtAGMAMQAzAC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtYmRtYzEzLXNsaWRlcy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}

@inproceedings{buck:sc13,
	abstract = {The MapReduce framework is being extended for domains quite different from the web applications for which it was designed, including the processing of big structured data, e.g., scientific and financial data. Previous work using MapReduce to process scientific data ignores existing structure when assigning intermediate data and scheduling tasks. In this paper, we present a method for incorporating knowledge of the structure of scientific data and executing query into the MapReduce communication model. Built in SciHadoop, a version of the Hadoop MapReduce framework for scientific data, SIDR intelligently partitions and routes intermediate data, allowing it to: remove Hadoop's global barrier and execute Reduce tasks prior to all Map tasks completing; minimize intermediate key skew; and produce early, correct results. SIDR executes queries up to 2.5 times faster than Hadoop and 37% faster than SciHadoop; produces initial results with only 6\% of the query completed; and produces dense, contiguous output.},
	address = {Denver, CO},
	author = {Joe Buck and Noah Watkins and Greg Levin and Adam Crume and Kleoni Ioannidou and Scott Brandt and Carlos Maltzahn and Neoklis Polyzotis and Aaron Torres},
	booktitle = {SC '13},
	date-added = {2013-07-21 00:28:59 +0000},
	date-modified = {2020-01-04 23:20:15 -0700},
	keywords = {papers, mapreduce, structured, datamanagement, routing, hpc},
	month = {November},
	title = {SIDR: Structure-Aware Intelligent Data Routing in Hadoop},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAoLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYnVjay1zYzEzLnBkZk8RAVQAAAAAAVQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w1idWNrLXNjMTMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2NZkTwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQgAAAgAyLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpCOmJ1Y2stc2MxMy5wZGYADgAcAA0AYgB1AGMAawAtAHMAYwAxADMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADBVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9CL2J1Y2stc2MxMy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQATwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAGn}}

@techreport{skourtis:ucsctr13a,
	address = {Santa Cruz, CA},
	author = {Dimitris Skourtis and Noah Watkins and Dimitris Achlioptas and Carlos Maltzahn and Scott Brandt},
	date-added = {2013-07-18 18:38:34 +0000},
	date-modified = {2013-07-19 05:52:07 +0000},
	institution = {UCSC},
	keywords = {papers, flash, cluster, redundancy, performance, management, qos, parallel, filesystems},
	month = {July 18},
	number = {UCSC-SOE-13-10},
	title = {Latency Minimization in {SSD} Clusters for Free},
	type = {Tech. rept.},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtdWNzY3RyMTNhLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZza291cnRpcy11Y3NjdHIxM2EucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zg2E6wAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNrb3VydGlzLXVjc2N0cjEzYS5wZGYAAA4ALgAWAHMAawBvAHUAcgB0AGkAcwAtAHUAYwBzAGMAdAByADEAMwBhAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9za291cnRpcy11Y3NjdHIxM2EucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=}}

@techreport{skourtis:ucsctr13,
	address = {Santa Cruz, CA},
	author = {Dimitris Skourtis and Scott A. Brandt and Carlos Maltzahn},
	date-added = {2013-07-17 23:54:42 +0000},
	date-modified = {2013-07-17 23:58:48 +0000},
	institution = {UCSC},
	keywords = {papers, flash, performance, management, qos},
	month = {May 14},
	number = {UCSC-SOE-13-08},
	title = {Ianus: Guaranteeing High Performance in Solid-State Drives},
	type = {Tech. rept.},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtdWNzY3RyMTMucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FXNrb3VydGlzLXVjc2N0cjEzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////ODH02AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2tvdXJ0aXMtdWNzY3RyMTMucGRmAA4ALAAVAHMAawBvAHUAcgB0AGkAcwAtAHUAYwBzAGMAdAByADEAMwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtdWNzY3RyMTMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==},
	bdsk-url-1 = {http://www.soe.ucsc.edu/research/technical-reports/ucsc-soe-13-08}}

@techreport{buck:ucsctr12,
	address = {Santa Cruz, CA},
	author = {Joe Buck and Noah Watkins and Greg Levin and Adam Crume and Kleoni Ioannidou and Scott Brandt and Carlos Maltzahn and Neoklis Polyzotis},
	date-added = {2013-05-30 22:56:59 +0000},
	date-modified = {2013-05-30 22:59:07 +0000},
	institution = {University of California Santa Cruz},
	keywords = {papers, mapreduce, hadoop, hpc, communication, networking, structured, datamanagement},
	month = {July 26},
	number = {UCSC-SOE-12-08},
	title = {Structure-Aware Intelligent Data Routing in SciHadoop},
	type = {Technical Report},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYnVjay11Y3NjdHIxMi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RYnVjay11Y3NjdHIxMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////83NJ9YAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUIAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QjpidWNrLXVjc2N0cjEyLnBkZgAOACQAEQBiAHUAYwBrAC0AdQBjAHMAYwB0AHIAMQAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQi9idWNrLXVjc2N0cjEyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@techreport{crume:ucsctr12,
	address = {Santa Cruz, CA},
	author = {Adam Crume and Joe Buck and Noah Watkins and Carlos Maltzahn and Scott Brandt and Neoklis Polyzotis},
	date-added = {2013-05-30 22:54:07 +0000},
	date-modified = {2013-05-30 22:55:49 +0000},
	institution = {University of California Santa Cruz},
	keywords = {papers, compression, hadoop, semantic, structured, datamanagement, mapreduce},
	month = {August 16},
	number = {UCSC-SOE-12-13},
	title = {SciHadoop Semantic Compression},
	type = {Technical Report},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY3J1bWUtdWNzY3RyMTIucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EmNydW1lLXVjc2N0cjEyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////NzSaNAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y3J1bWUtdWNzY3RyMTIucGRmAAAOACYAEgBjAHIAdQBtAGUALQB1AGMAcwBjAHQAcgAxADIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9DL2NydW1lLXVjc2N0cjEyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@techreport{watkins:ucsctr13,
	abstract = {The emergence of high-performance open-source storage systems is allowing application and middleware developers to consider non-standard storage system interfaces. In contrast to the common practice of translating all I/O access onto the POSIX file interface, it will soon be common for application development to include the co-design of storage system interfaces. In order for developers to evolve a co-design in high-availability clusters, services are needed for in-vivo interface evolution that allows the development of interfaces in the context of a live system.
Current clustered storage systems that provide interface customizability expose primitive services for managing static interfaces. For maximum utility, creating, evolving, and deploying dynamic storage interfaces is needed. However, in large-scale clusters, dynamic interface instantiation will require system-level support that ensures interface version consistency among storage nodes and clients. We propose that storage systems should provide services that fully manage the life-cycle of dynamic interfaces that are aligned with the common branch-and-merge form of software maintenance, including isolated development workspaces that can be combined into existing production views of the system.
},
	address = {Santa Cruz, CA},
	author = {Noah Watkins and Carlos Maltzahn and Scott Brandt and Ian Pye and Adam Manzanares},
	date-added = {2013-05-30 22:41:44 +0000},
	date-modified = {2020-01-04 23:01:28 -0700},
	institution = {University of California Santa Cruz},
	keywords = {papers, datamodel, scripting, storage, systems, software-defined},
	month = {March 16},
	number = {UCSC-SOE-13-02},
	title = {In-Vivo Storage System Development},
	type = {Technical Report},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy11Y3NjdHIxMy5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Ud2F0a2lucy11Y3NjdHIxMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////83NI4MAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVcAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6Vzp3YXRraW5zLXVjc2N0cjEzLnBkZgAADgAqABQAdwBhAHQAawBpAG4AcwAtAHUAYwBzAGMAdAByADEAMwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy11Y3NjdHIxMy5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@inproceedings{ionkov:msst13,
	abstract = {Until recently most scientific applications produced data that is saved, analyzed and visualized at later time. In recent years, with the large increase in the amount of data and computational power available there is demand for applications to support data access in-situ, or close-to simulation to provide application steering, analytics and visualization. Data access patterns required for these activities are usually different than the data layout produced by the application. In most of the large HPC clusters scientific data is stored in parallel file systems instead of locally on the cluster nodes. To increase reliability, the data is replicated, usually using some of the standard RAID schemes. Parallel file server nodes usually have more processing power than they need, so it is feasible to offload some of the data intensive processing to them. DRepl project replaces the standard methods of data replication with replicas having different layouts, optimized for the most commonly used access patterns. Replicas can be complete (i.e. any other replica can be reconstructed from it), or incomplete. DRepl consists of a language to describe the dataset and the necessary data layouts and tools to create a user-space file server that provides and keeps the data consistent and up to date in all optimized layouts.},
	address = {Long Beach, CA},
	author = {Latchesar Ionkov and Mike Lang and Carlos Maltzahn},
	booktitle = {MSST '13},
	date-added = {2013-03-26 23:29:57 +0000},
	date-modified = {2020-05-10 19:28:44 -0700},
	keywords = {papers, redundancy, layout, hpc, storage, storagemedium, languages, programmable},
	month = {May 6-10},
	title = {DRepl: Optimizing Access to Application Data for Analysis and Visualization},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9pb25rb3YtbXNzdDEzLnBkZk8RAWoAAAAAAWoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xFpb25rb3YtbXNzdDEzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zlIjbwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADgvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjppb25rb3YtbXNzdDEzLnBkZgAOACQAEQBpAG8AbgBrAG8AdgAtAG0AcwBzAHQAMQAzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA2VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2lvbmtvdi1tc3N0MTMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFUAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABww==}}

@inproceedings{he:hpdc13,
	abstract = {The I/O bottleneck in high-performance computing is becoming worse as application data continues to grow. In this work, we explore how patterns of I/O within these applications can significantly affect the effectiveness of the underlying storage systems and how these same patterns can be utilized to improve many aspects of the I/O stack and mitigate the I/O bottleneck. We offer three main contributions in this paper. First, we develop and evaluate algorithms by which I/O patterns can be efficiently discovered and described. Second, we implement one such algorithm to reduce the metadata quantity in a virtual parallel file system by up to several orders of magnitude, thereby increasing the performance of writes and reads by up to 40 and 480 percent respectively. Third, we build a prototype file system with pattern-aware prefetching and evaluate it to show a 46 percent reduction in I/O latency. Finally, we believe that efficient pattern discovery and description, coupled with the observed predictability of complex patterns within many high-performance applications, offers significant potential to enable many additional I/O optimizations.},
	address = {New York City, NY},
	author = {Jun He and John Bent and Aaron Torres and Gary Grider and Garth Gibson and Carlos Maltzahn and Xian-He Sun},
	booktitle = {HPDC '13},
	date-added = {2013-03-26 23:25:38 +0000},
	date-modified = {2020-01-05 05:25:00 -0700},
	keywords = {papers, compression, plfs, indexing, checkpointing, patterndetection},
	month = {June 17-22},
	title = {I/O Acceleration with Pattern Detection},
	year = {2013},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAoLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGUtaHBkYzEzLnBkZk8RAVQAAAAAAVQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w1oZS1ocGRjMTMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zZL7rQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSAAAAgAyLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpIOmhlLWhwZGMxMy5wZGYADgAcAA0AaABlAC0AaABwAGQAYwAxADMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADBVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9IL2hlLWhwZGMxMy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQATwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAGn}}

@techreport{maltzahn:cutr99,
	abstract = {The bandwidth usage due to HTTP traffic often varies considerably over the course of a day, requiring high network performance during peak periods while leaving network resources unused during off-peak periods. We show that using these extra network resources to prefetch web content during off-peak periods can significantly reduce peak bandwidth usage without compromising cache consistency. With large HTTP traffic variations it is therefore feasible to apply ``bandwidth smoothing'' to reduce the cost and the required capacity of a network infrastructure. In addition to reducing the peak network demand, bandwidth smoothing improves cache hit rates.
We calculate the potential reduction in bandwidth for a given bandwidth usage profile, and show that a simple hueristic has poor prefetch accuracy. We then apply machine learning techniques to automatically develop prefetch strategies that have high accuracy. Our results are based on web proxy traces generated at a large corporate Internet exchange point and data collected from recent scans of popular web sites.},
	author = {Carlos Maltzahn and Kathy Richardson and Dirk Grunwald and James Martin},
	date-added = {2012-12-07 22:58:31 +0000},
	date-modified = {2020-01-05 05:26:05 -0700},
	institution = {Dept. of Computer Science, University of Colorado at Boulder},
	keywords = {papers, prefetching, caching, machinelearning, networking, intermediary},
	month = {January},
	number = {CU-CS-879-99},
	title = {A Feasibility Study of Bandwidth Smoothing on the World-Wide Web Using Machine Learning},
	type = {Technical Report},
	year = {1999},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tY3V0cjk5LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNtYWx0emFobi1jdXRyOTkucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zOe2AgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWN1dHI5OS5wZGYADgAoABMAbQBhAGwAdAB6AGEAaABuAC0AYwB1AHQAcgA5ADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9NL21hbHR6YWhuLWN1dHI5OS5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@phdthesis{maltzahn:phdthesis99,
	abstract = {The resource utilization of enterprise-level Web proxy servers is primarily dependent on network and disk I/O latencies and is highly variable due to a diurnal workload pattern with very predictable peak and off-peak periods. Often, the cost of resources depends on the purchased resource capacity instead of the actual utilization. This motivates the use of off-peak periods to perform speculative work in the hope that this work will later reduce resource utilization during peak periods. We take two approaches to improve resource utilization.
In the first approach we reduce disk I/O by cache compaction during off-peak periods and by carefully designing the way a cache architecture utilizes operating system services such as the file system buffer cache and the virtual memory system. Evaluating our designs with workload generators on standard file systems we achieve disk I/O savings of over 70% compared to existing Web proxy server architectures.
In the second approach we reduce peak bandwidth levels by prefetching bandwidth dur- ing off-peak periods. Our analysis reveals that 40% of the cacheable miss bandwidth is prefetch- able. We found that 99% of this prefetchable bandwidth is based on objects that the Web proxy server under study has not accessed before. However, these objects originate from servers which the Web proxy server under study has accessed before. Using machine learning techniques we are able to automatically generate prefetch strategies of high accuracy and medium coverage. A test of these prefetch strategies on real workloads achieves a peak-level reduction of up to 12%.},
	address = {Boulder, Co},
	author = {Carlos Maltzahn},
	date-added = {2012-12-07 22:22:06 +0000},
	date-modified = {2020-01-05 05:26:52 -0700},
	keywords = {papers, prefetching, networking, intermediary, caching, performance, machinelearning},
	school = {University of Colorado at Boulder},
	title = {Improving Resource Utilization of Enterprise-Level World-Wide Web Proxy Servers},
	year = {1999},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tcGhkdGhlc2lzOTkucGRmTxEBggAAAAABggACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////GG1hbHR6YWhuLXBoZHRoZXNpczk5LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////M56zKAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFNAAACAD0vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOk06bWFsdHphaG4tcGhkdGhlc2lzOTkucGRmAAAOADIAGABtAGEAbAB0AHoAYQBoAG4ALQBwAGgAZAB0AGgAZQBzAGkAcwA5ADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADtVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9NL21hbHR6YWhuLXBoZHRoZXNpczk5LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHg}}

@inproceedings{watkins:pdsw12,
	abstract = {As applications become more complex, and the level of concurrency in systems continue to rise, developers are struggling to scale complex data models on top of a traditional byte stream interface. Middleware tailored for specific data models is a common approach to dealing with these challenges, but middleware commonly reproduces scalable services already present in many distributed file systems.
We present DataMods, an abstraction over existing services found in large-scale storage systems that allows middleware to take advantage of existing, highly tuned services. Specifically, DataMods provides an abstraction for extending storage system services in order to implement native, domain-specific data models and interfaces throughout the storage hierarchy.},
	address = {Salt Lake City, UT},
	author = {Noah Watkins and Carlos Maltzahn and Scott A. Brandt and Adam Manzanares},
	booktitle = {PDSW'12},
	date-added = {2012-11-02 06:03:40 +0000},
	date-modified = {2020-01-05 05:27:34 -0700},
	keywords = {papers, filesystems, programming, datamanagement},
	month = {November 12},
	read = {1},
	title = {DataMods: Programmable File System Services},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1wZHN3MTIucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EndhdGtpbnMtcGRzdzEyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////M9RuLAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFXAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlc6d2F0a2lucy1wZHN3MTIucGRmAAAOACYAEgB3AGEAdABrAGkAbgBzAC0AcABkAHMAdwAxADIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtcGRzdzEyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1wZHN3MTItc2xpZGVzLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xl3YXRraW5zLXBkc3cxMi1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zXd8LAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndhdGtpbnMtcGRzdzEyLXNsaWRlcy5wZGYADgA0ABkAdwBhAHQAawBpAG4AcwAtAHAAZABzAHcAMQAyAC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtcGRzdzEyLXNsaWRlcy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}

@inproceedings{crume:pdsw12,
	abstract = {In Hadoop mappers send data to reducers in the form of key/value pairs. The default design of Hadoop's process for transmitting this intermediate data can cause a very high overhead, especially for scientific data containing multiple variables in a multi-dimensional space. For example, for a 3D scalar field of a variable ``windspeed1'' the size of keys was 6.75 times the size of values. Much of the disk and network bandwidth of ``shuffling'' this intermediate data is consumed by repeatedly transmitting the variable name for each value. This significant waste of resources is due to an assumption fundamental to Hadoop's design that all key/values are independent. This assumption is inadequate for scientific data which is often organized in regular grids, a structure that can be described in small, constant size.
Earlier we presented SciHadoop, a slightly modified version of Hadoop designed for processing scientific data. We reported on experiments with SciHadoop which confirm that the size of intermediate data has a significant impact on overall performance. Here we show preliminary designs of multiple lossless approaches to compressing intermediate data, one of which results in up to five orders of magnitude reduction the original key/value ratio.},
	address = {Salt Lake City, UT},
	author = {Adam Crume and Joe Buck and Carlos Maltzahn and Scott Brandt},
	booktitle = {PDSW'12},
	date-added = {2012-11-02 06:02:29 +0000},
	date-modified = {2020-01-05 06:29:22 -0700},
	keywords = {papers, mapreduce, compression, array},
	month = {November 12},
	title = {Compressing Intermediate Keys between Mappers and Reducers in SciHadoop},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY3J1bWUtcGRzdzEyLnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBjcnVtZS1wZHN3MTIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zXd7tQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQwAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpDOmNydW1lLXBkc3cxMi5wZGYAAA4AIgAQAGMAcgB1AG0AZQAtAHAAZABzAHcAMQAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jcnVtZS1wZHN3MTIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY3J1bWUtcGRzdzEyLXNsaWRlcy5wZGZPEQF8AAAAAAF8AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8XY3J1bWUtcGRzdzEyLXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////813e/0AAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAPC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjcnVtZS1wZHN3MTItc2xpZGVzLnBkZgAOADAAFwBjAHIAdQBtAGUALQBwAGQAcwB3ADEAMgAtAHMAbABpAGQAZQBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA6VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jcnVtZS1wZHN3MTItc2xpZGVzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABZAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdk=}}

@inproceedings{he:pdsw12,
	abstract = {Checkpointing is the predominant storage driver in today's petascale supercomputers and is expected to remain as such in tomorrow's exascale supercomputers. Users typically prefer to checkpoint into a shared file yet parallel file systems often perform poorly for shared file writing. A powerful technique to address this problem is to transparently transform shared file writing into many exclusively written as is done in ADIOS and PLFS. Unfortunately, the metadata to reconstruct the fragments into the original file grows with the number of writers. As such, the current approach cannot scale to exaflop supercomputers due to the large overhead of creating and reassembling the metadata.
In this paper, we develop and evaluate algorithms by which patterns in the PLFS metadata can be discovered and then used to replace the current metadata. Our evaluation shows that these patterns reduce the size of the metadata by several orders of magnitude, increase the performance of writes by up to 40 percent, and the performance of reads by up to 480 percent. This contribution therefore can allow current checkpointing models to survive the transition from peta- to exascale.},
	address = {Salt Lake City, UT},
	author = {Jun He and John Bent and Aaron Torres and Gary Grider and Garth Gibson and Carlos Maltzahn and Xian-He Sun},
	booktitle = {PDSW'12},
	date-added = {2012-11-02 06:00:38 +0000},
	date-modified = {2020-01-05 05:28:43 -0700},
	keywords = {papers, compression, indexing, plfs, patterndetection, checkpointing},
	month = {November 12},
	read = {1},
	title = {Discovering Structure in Unstructured I/O},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAoLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGUtcGRzdzEyLnBkZk8RAVQAAAAAAVQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w1oZS1wZHN3MTIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zMRRFgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSAAAAgAyLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpIOmhlLXBkc3cxMi5wZGYADgAcAA0AaABlAC0AcABkAHMAdwAxADIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADBVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9IL2hlLXBkc3cxMi5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQATwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAGn},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGUtcGRzdzEyLXNsaWRlcy5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UaGUtcGRzdzEyLXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////813fFwAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUgAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SDpoZS1wZHN3MTItc2xpZGVzLnBkZgAADgAqABQAaABlAC0AcABkAHMAdwAxADIALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0gvaGUtcGRzdzEyLXNsaWRlcy5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@techreport{watkins:soetr12,
	abstract = {Cloud-based services have become an attractive alternative to in-house data centers because of their flexible, on-demand availability of compute and storage resources. This is also true for scientific high-performance computing (HPC) applications that are currently being run on expensive, dedicated hardware. One important challenge of HPC applications is their need to perform periodic global checkpoints of execution state to stable storage in order to recover from failures, but the checkpoint process can dominate the total run-time of HPC applications even in the failure-free case! In HPC architectures, dedicated stable storage is highly tuned for this type of workload using locality and physical layout policies, which are generally unknown in typical cloud environments. In this paper we introduce DataMods, an extended version of the Ceph file system and associated distributed object store RADOS, which are widely used in open source cloud stacks. DataMods extends object-based storage with extended services take advantage of common cloud data center node hardware configurations (i.e. CPU and local storage resources), and that can be used to construct efficient, scalable middleware services that span the entire storage stack and utilize asynchronous services for offline data management services.},
	address = {Santa Cruz, CA},
	author = {Noah Watkins and Carlos Maltzahn and Scott A. Brandt and Adam Manzanares},
	date-added = {2012-07-21 11:39:45 +0000},
	date-modified = {2020-01-05 05:29:20 -0700},
	institution = {University of California Santa Cruz},
	keywords = {papers, filesystems, programming, datamanagement},
	month = {July},
	number = {UCSC-SOE-12-07},
	title = {DataMods: Programmable File System Services},
	type = {Technical Report},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2F0a2lucy1zb2V0cjEyLnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xN3YXRraW5zLXNvZXRyMTIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////zC8z2QAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndhdGtpbnMtc29ldHIxMi5wZGYADgAoABMAdwBhAHQAawBpAG4AcwAtAHMAbwBlAHQAcgAxADIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhdGtpbnMtc29ldHIxMi5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{bhagwan:spe12,
	abstract = {In healthcare, de-identification is fast becoming a service that is indispensable when medical data needs to be used for research and secondary use purposes. Currently, this process is done either manually, by human agent, or by an automated software algorithm. Both approaches have shortcomings. Here, we introduce a framework for enhancing the outcome of the current modes of executing a de-identification service. This paper presents the steps taken in conceiving and building a privacy framework and tool that improves the service of de-identification. Further, we test the usefulness and applicability of this system through a study with HIPAA-trained experts.},
	address = {Honolulu, HI},
	author = {Varun Bhagwan and Tyrone Grandison and Carlos Maltzahn},
	booktitle = {IEEE 2012 Services Workshop on Security and Privacy Engineering (SPE2012)},
	date-added = {2012-05-22 03:42:44 +0000},
	date-modified = {2020-01-05 05:29:59 -0700},
	keywords = {papers, privacy, humancomputation, healthcare},
	month = {June},
	title = {Recommendation-based De-Identification | A Practical Systems Approach towards De-identification of Unstructured Text in Healthcare},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYmhhZ3dhbi1zcGUxMi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RYmhhZ3dhbi1zcGUxMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////8vgW1UAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUIAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QjpiaGFnd2FuLXNwZTEyLnBkZgAOACQAEQBiAGgAYQBnAHcAYQBuAC0AcwBwAGUAMQAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQi9iaGFnd2FuLXNwZTEyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{kato:usenix12,
	abstract = {Graphics processing units (GPUs) have become a very powerful platform embracing a concept of heterogeneous many-core computing. However, application domains of GPUs are currently limited to specific systems, largely due to a lack of ``first-class'' GPU resource management for general-purpose multi-tasking systems.
We present Gdev, a new ecosystem of GPU resource management in the operating system (OS). It allows the user space as well as the OS itself to use GPUs as first-class computing resources. Specifically, Gdev's virtual memory manager supports data swapping for excessive memory resource demands, and also provides a shared device memory functionality that allows GPU contexts to communicate with other contexts. Gdev further provides a GPU scheduling scheme to virtualize a physical GPU into multiple logical GPUs, enhancing isolation among working sets of multi-tasking systems.
Our evaluation conducted on Linux and the NVIDIA GPU shows that the basic performance of our prototype implementation is reliable even compared to proprietary software. Further detailed experiments demonstrate that Gdev achieves a 2x speedup for an encrypted file system using the GPU in the OS. Gdev can also improve the makespan of dataflow programs by up to 49% exploiting shared device memory, while an error in the utilization of virtualized GPUs can be limited within only 7%.},
	address = {Boston, MA},
	author = {Shinpei Kato and Michael McThrow and Carlos Maltzahn and Scott A. Brandt},
	booktitle = {USENIX ATC '12},
	date-added = {2012-04-06 22:55:09 +0000},
	date-modified = {2020-01-05 05:30:40 -0700},
	keywords = {papers, gpgpu, kernel, linux, scheduling},
	title = {Gdev: First-Class GPU Resource Management in the Operating System},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva2F0by11c2VuaXgxMi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Ra2F0by11c2VuaXgxMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////85SIggAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUsAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SzprYXRvLXVzZW5peDEyLnBkZgAOACQAEQBrAGEAdABvAC0AdQBzAGUAbgBpAHgAMQAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSy9rYXRvLXVzZW5peDEyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{liu:msst12,
	abstract = {The largest-scale high-performance (HPC) systems are stretching parallel file systems to their limits in terms of aggregate bandwidth and numbers of clients. To further sustain the scalability of these file systems, researchers and HPC storage architects are exploring various storage system designs. One proposed storage system design integrates a tier of solid-state burst buffers into the storage system to absorb application I/O requests. In this paper, we simulate and explore this storage system design for use by large-scale HPC systems. First, we examine application I/O patterns on an existing large-scale HPC system to identify common burst patterns. Next, we describe enhancements to the CODES storage system simulator to enable our burst buffer simulations. These enhancements include the integration of a burst buffer model into the I/O forwarding layer of the simulator, the development of an I/O kernel description language and interpreter, the development of a suite of I/O kernels that are derived from observed I/O patterns, and fidelity improvements to the CODES models. We evaluate the I/O performance for a set of multiapplication I/O workloads and burst buffer configurations. We show that burst buffers can accelerate the application perceived throughput to the external storage system and can reduce the amount of external storage bandwidth required to meet a desired application perceived throughput goal.},
	address = {Pacific Grove, CA},
	author = {Ning Liu and Jason Cope and Philip Carns and Christopher Carothers and Robert Ross and Gary Grider and Adam Crume and Carlos Maltzahn},
	booktitle = {MSST/SNAPI 2012},
	date-added = {2012-03-14 14:37:23 +0000},
	date-modified = {2020-01-05 05:31:12 -0700},
	keywords = {papers, burstbuffer, simulation, hpc, distributed},
	month = {April 16 - 20},
	title = {On the Role of Burst Buffers in Leadership-class Storage Systems},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LW1zc3QxMi5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8ObGl1LW1zc3QxMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////8ytnO0AAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaXUtbXNzdDEyLnBkZgAADgAeAA4AbABpAHUALQBtAHMAcwB0ADEAMgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LW1zc3QxMi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==},
	bdsk-url-1 = {http://www.mcs.anl.gov/uploads/cels/papers/P2070-0312.pdf}}

@inproceedings{crume:sc11poster,
	address = {Seattle, WA},
	author = {Adam Crume and Carlos Maltzahn and Jason Cope and Sam Lang and Rob Ross and Phil Carns and Chris Carothers and Ning Liu and Curtis L. Janssen and John Bent and Stephen Eidenbenz and Meghan Wingate},
	booktitle = {Poster Session at SC 11},
	date-added = {2012-03-01 20:39:54 +0000},
	date-modified = {2020-01-05 05:31:34 -0700},
	keywords = {shortpapers, machinelearning, simulation, performance},
	month = {November 12-18},
	title = {FLAMBES: Evolving Fast Performance Models},
	year = {2011},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY3J1bWUtc2MxMXBvc3Rlci5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UY3J1bWUtc2MxMXBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqwsYAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjcnVtZS1zYzExcG9zdGVyLnBkZgAADgAqABQAYwByAHUAbQBlAC0AcwBjADEAMQBwAG8AcwB0AGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY3J1bWUtc2MxMXBvc3Rlci5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@article{ames:peds12,
	abstract = {File system metadata management has become a bottleneck for many data-intensive applications that rely on high-performance file systems. Part of the bottleneck is due to the limitations of an almost 50-year-old interface standard with metadata abstractions that were designed at a time when high-end file systems managed less than 100MB. Today's high-performance file systems store 7--9 orders of magnitude more data, resulting in a number of data items for which these metadata abstractions are inadequate, such as directory hierarchies unable to handle complex relationships among data. Users of file systems have attempted to work around these inadequacies by moving application-specific metadata management to relational databases to make metadata searchable. Splitting file system metadata management into two separate systems introduces inefficiencies and systems management problems. To address this problem, we propose QMDS: a file system metadata management service that integrates all file system metadata and uses a graph data model with attributes on nodes and edges. Our service uses a query language interface for file identification and attribute retrieval. We present our metadata management service design and architecture and study its performance using a text analysis benchmark application. Results from our QMDS prototype show the effectiveness of this approach. Compared to the use of a file system and relational database, the QMDS prototype shows superior performance for both ingest and query workloads.},
	author = {Sasha Ames and Maya Gokhale and Carlos Maltzahn},
	date-added = {2012-02-27 18:02:43 +0000},
	date-modified = {2020-01-05 05:32:03 -0700},
	journal = {International Journal of Parallel, Emergent and Distributed Systems},
	keywords = {papers, metadata, management, graphs, filesystems, datamanagement},
	number = {2},
	title = {QMDS: a file system metadata management service supporting a graph data model-based query language},
	volume = {27},
	year = {2012},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1wZWRzMTIucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D2FtZXMtcGVkczEyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////RbFwDAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFBAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkE6YW1lcy1wZWRzMTIucGRmAA4AIAAPAGEAbQBlAHMALQBwAGUAZABzADEAMgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1wZWRzMTIucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==}}

@techreport{ionkov:lanltr11,
	author = {Ionkov, Latchesar and Lang, Michael and Maltzahn, Carlos},
	date-added = {2012-01-24 15:38:48 +0000},
	date-modified = {2012-01-24 15:38:48 +0000},
	institution = {Los Alamos National Laboratory},
	number = {LA-UR-11-11589},
	title = {{DRepl: Optimizing Access to Application Data for Analysis and Visualization}},
	type = {Technical Report},
	year = {2011}}

@inproceedings{liu:ppam11,
	abstract = {Exascale supercomputers will have the potential for billion-way parallelism. While physical implementations of these systems are currently not available, HPC system designers can develop models of exascale systems to evaluate system design points. Modeling these systems and associated subsystems is a significant challenge. In this paper, we present the Co-design of Exascale Storage System (CODES) framework for evaluating exascale storage system design points. As part of our early work with CODES, we discuss the use of the CODES framework to simulate leadership-scale storage systems in a tractable amount of time using parallel discrete-event simulation. We describe the current storage system models and protocols included with the CODES framework and demonstrate the use of CODES through simulations of an existing petascale storage system.
},
	address = {Torun, Poland},
	author = {Ning Liu and Christopher Carothers and Jason Cope and Philip Carns and Robert Ross and Adam Crume and Carlos Maltzahn},
	booktitle = {PPAM 2011},
	date-added = {2012-01-17 01:13:05 +0000},
	date-modified = {2020-01-05 05:32:41 -0700},
	keywords = {papers, simulation, exascale, storage, systems, parallel, filesystems, hpc},
	month = {September 11-14},
	title = {Modeling a Leadership-scale Storage System},
	year = {2011},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LXBwYW0xMS5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8ObGl1LXBwYW0xMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////8s6Cx4AAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaXUtcHBhbTExLnBkZgAADgAeAA4AbABpAHUALQBwAHAAYQBtADEAMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LXBwYW0xMS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==}}

@inproceedings{buck:sc11,
	abstract = {Hadoop has become the de facto platform for large-scale data analysis in commercial applications, and increasingly so in scientific applications. However, Hadoop's byte stream data model causes inefficiencies when used to process scientific data that is commonly stored in highly-structured, array-based binary file formats resulting in limited scalability of Hadoop applications in science. We introduce SciHadoop, a Hadoop plugin allowing scientists to specify logical queries over array-based data models. SciHadoop executes queries as map/reduce programs defined over the logical data model. We describe the implementation of a SciHadoop prototype for NetCDF data sets and quantify the performance of five separate optimizations that address the following goals for several representative aggregate queries: reduce total data transfers, reduce remote reads, and reduce unnecessary reads. Two optimizations allow holistic aggregate queries to be evaluated opportunistically during the map phase; two additional optimizations intelligently partition input data to increase read locality, and one optimization avoids block scans by examining the data dependencies of an executing query to prune input partitions. Experiments involving a holistic function show run-time improvements of up to 8x, with drastic reductions of IO, both locally and over the network.},
	address = {Seattle, WA},
	author = {Joe Buck and Noah Watkins and Jeff LeFevre and Kleoni Ioannidou and Carlos Maltzahn and Neoklis Polyzotis and Scott A. Brandt},
	booktitle = {SC '11},
	date-added = {2011-08-02 22:58:10 +0000},
	date-modified = {2020-01-05 05:34:48 -0700},
	keywords = {papers, mapreduce, datamanagement, hpc, structured, netcdf},
	month = {November},
	read = {1},
	title = {SciHadoop: Array-based Query Processing in Hadoop},
	year = {2011},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAoLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYnVjay1zYzExLnBkZk8RAVQAAAAAAVQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w1idWNrLXNjMTEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2NARwwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQgAAAgAyLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpCOmJ1Y2stc2MxMS5wZGYADgAcAA0AYgB1AGMAawAtAHMAYwAxADEALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADBVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9CL2J1Y2stc2MxMS5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQATwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAGn}}

@techreport{buck:tr0411,
	author = {Joe Buck and Noah Watkins and Jeff LeFevre and Kleoni Ioannidou and Carlos Maltzahn and Neoklis Polyzotis and Scott A. Brandt},
	date-added = {2011-05-27 00:06:15 -0700},
	date-modified = {2011-05-27 00:15:42 -0700},
	institution = {UCSC},
	month = {April},
	number = {UCSC-SOE-11-04},
	title = {SciHadoop: Array-based Query Processing in Hadoop},
	year = {2011},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYnVjay10cjA0MTEucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D2J1Y2stdHIwNDExLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////KEkSoAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFCAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkI6YnVjay10cjA0MTEucGRmAA4AIAAPAGIAdQBjAGsALQB0AHIAMAA0ADEAMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0IvYnVjay10cjA0MTEucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==}}

@inproceedings{estolano:nsdi10,
	address = {San Jose, CA},
	author = {Esteban Molina-Estolano and Carlos Maltzahn and Ben Reed and Scott A. Brandt},
	booktitle = {Poster Session at NSDI 2010},
	date-added = {2011-05-26 23:31:27 -0700},
	date-modified = {2020-01-05 05:36:43 -0700},
	keywords = {shortpapers, metadata, mapreduce, ceph},
	month = {April 28-30},
	title = {Haceph: Scalable Metadata Management for Hadoop using Ceph},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA5Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lZXN0b2xhbi1uc2RpMTAtYWJzdHJhY3QucGRmTxEBmAAAAAABmAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////HGVlc3RvbGFuLW5zZGkxMC1hYnN0cmFjdC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKsXRAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANFLUYAAAIAQy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RS1GOmVlc3RvbGFuLW5zZGkxMC1hYnN0cmFjdC5wZGYAAA4AOgAcAGUAZQBzAHQAbwBsAGEAbgAtAG4AcwBkAGkAMQAwAC0AYQBiAHMAdAByAGEAYwB0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBBVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvRS1GL2Vlc3RvbGFuLW5zZGkxMC1hYnN0cmFjdC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAGAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB/A==},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA3Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lZXN0b2xhbi1uc2RpMTAtcG9zdGVyLnBkZk8RAZAAAAAAAZAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xplZXN0b2xhbi1uc2RpMTAtcG9zdGVyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2irF+QAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADRS1GAAACAEEvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkUtRjplZXN0b2xhbi1uc2RpMTAtcG9zdGVyLnBkZgAADgA2ABoAZQBlAHMAdABvAGwAYQBuAC0AbgBzAGQAaQAxADAALQBwAG8AcwB0AGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAP1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0UtRi9lZXN0b2xhbi1uc2RpMTAtcG9zdGVyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAXgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHy}}

@inproceedings{wacha:eurosys10,
	address = {Paris, France},
	author = {Rosie Wacha and Scott A. Brandt and John Bent and Carlos Maltzahn},
	booktitle = {Poster Session and Ph.D. Workshop at EuroSys 2010},
	date-added = {2011-05-26 23:29:21 -0700},
	date-modified = {2020-01-05 05:37:15 -0700},
	keywords = {shortpapers, raid, flash},
	month = {April 13-16},
	title = {RAID4S: Adding SSDs to RAID Arrays},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2FjaGEtZXVyb3N5czEwcG9zdGVyLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xl3YWNoYS1ldXJvc3lzMTBwb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2irFPwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndhY2hhLWV1cm9zeXMxMHBvc3Rlci5wZGYADgA0ABkAdwBhAGMAaABhAC0AZQB1AHIAbwBzAHkAcwAxADAAcABvAHMAdABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dhY2hhLWV1cm9zeXMxMHBvc3Rlci5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}

@inproceedings{ames:nas11,
	address = {Dalian, China},
	author = {Sasha Ames and Maya B. Gokhale and Carlos Maltzahn},
	booktitle = {NAS 2011},
	date-added = {2011-05-26 23:15:19 -0700},
	date-modified = {2011-05-26 23:17:11 -0700},
	keywords = {papers, metadata, graphs, linking, filesystems},
	month = {July 28-30},
	title = {QMDS: A File System Metadata Management Service Supporting a Graph Data Model-based Query Language},
	year = {2011}}

@inproceedings{pineiro:rtas11,
	abstract = {Real-time systems and applications are becoming increasingly complex and often comprise multiple communicating tasks. The management of the individual tasks is well-understood, but the interaction of communicating tasks with different timing characteristics is less well-understood. We discuss several representative inter-task communication flows via reserved memory buffers (possibly interconnected via a real-time network) and present RAD-Flows, a model for managing these interactions. We provide proofs and simulation results demonstrating the correctness and effectiveness of RAD-Flows, allowing system designers to determine the amount of memory required based upon the characteristics of the interacting tasks and to guarantee real-time operation of the system as a whole.},
	address = {Chicago, IL},
	author = {Roberto Pineiro and Kleoni Ioannidou and Carlos Maltzahn and Scott A. Brandt},
	booktitle = {RTAS 2011},
	date-added = {2010-12-15 12:11:43 -0800},
	date-modified = {2020-01-05 05:37:41 -0700},
	keywords = {papers, memory, realtime, qos, performance, management},
	month = {April 11-14},
	title = {RAD-FLOWS: Buffering for Predictable Communication},
	year = {2011},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcGluZWlyby1ydGFzMTEucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EnBpbmVpcm8tcnRhczExLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////LFTjnAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFQAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlA6cGluZWlyby1ydGFzMTEucGRmAAAOACYAEgBwAGkAbgBlAGkAcgBvAC0AcgB0AGEAcwAxADEALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9QL3BpbmVpcm8tcnRhczExLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@article{maltzahn:login10,
	abstract = {The Hadoop Distributed File System (HDFS) has a single metadata server that sets a hard limit on its maximum size. Ceph, a high-performance distributed file system under development since 2005 and now supported in Linux, bypasses the scaling limits of HDFS. We describe Ceph and its elements and provide instructions for installing a demonstration system that can be used with Hadoop.},
	author = {Carlos Maltzahn and Esteban Molina-Estolano and Amandeep Khurana and Alex J. Nelson and Scott A. Brandt and Sage A. Weil},
	date-added = {2010-09-30 15:19:48 -0700},
	date-modified = {2020-01-05 05:43:26 -0700},
	journal = {;login: The USENIX Magazine},
	keywords = {papers, filesystems, parallel, hadoop, mapreduce, storage},
	number = {4},
	title = {Ceph as a Scalable Alternative to the Hadoop Distributed File System},
	volume = {35},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tbG9naW4xMC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbWFsdHphaG4tbG9naW4xMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9YVSKMAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAU0AAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TTptYWx0emFobi1sb2dpbjEwLnBkZgAADgAqABQAbQBhAGwAdAB6AGEAaABuAC0AbABvAGcAaQBuADEAMAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tbG9naW4xMC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}

@inproceedings{maltzahn:fast10,
	address = {San Jose, CA},
	author = {Carlos Maltzahn and Michael Mateas and Jim Whitehead},
	booktitle = {Work-in-Progress and Poster Session at FAST'10},
	date-added = {2010-03-01 16:46:58 -0800},
	date-modified = {2020-01-05 05:44:34 -0700},
	keywords = {shortpapers, casual, games, ir, datamanagement, pim},
	month = {February 24-27},
	title = {InfoGarden: A Casual-Game Approach to Digital Archive Management},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tZmFzdHdpcDEwLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZtYWx0emFobi1mYXN0d2lwMTAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2OFRhAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWZhc3R3aXAxMC5wZGYAAA4ALgAWAG0AYQBsAHQAegBhAGgAbgAtAGYAYQBzAHQAdwBpAHAAMQAwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWx0emFobi1mYXN0d2lwMTAucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
	bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA3Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tZmFzdHdpcDEwc2xpZGVzLnBkZk8RAZIAAAAAAZIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xxtYWx0emFobi1mYXN0d2lwMTBzbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2OFR5wAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgBBLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWZhc3R3aXAxMHNsaWRlcy5wZGYAAA4AOgAcAG0AYQBsAHQAegBhAGgAbgAtAGYAYQBzAHQAdwBpAHAAMQAwAHMAbABpAGQAZQBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA/VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWx0emFobi1mYXN0d2lwMTBzbGlkZXMucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABeAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfQ=},
	bdsk-file-3 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA3Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tZmFzdHdpcDEwcG9zdGVyLnBkZk8RAZIAAAAAAZIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xxtYWx0emFobi1mYXN0d2lwMTBwb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2OFSHQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgBBLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWZhc3R3aXAxMHBvc3Rlci5wZGYAAA4AOgAcAG0AYQBsAHQAegBhAGgAbgAtAGYAYQBzAHQAdwBpAHAAMQAwAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA/VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWx0emFobi1mYXN0d2lwMTBwb3N0ZXIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABeAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfQ=},
	bdsk-url-1 = {http://www.cs.ucsc.edu/%7Ecarlosm/Infogarden/FAST_2010_WiP_Talk.html}}

@inproceedings{polte:fast10,
	address = {San Jose, CA},
	author = {Milo Polte and Esteban Molina-Estolano and John Bent and Scott A. Brandt and Garth A. Gibson and Maya Gokhale and Carlos Maltzahn and Meghan Wingate},
	booktitle = {Work-in-Progress and Poster Session at FAST'10},
	date-added = {2010-03-01 16:40:17 -0800},
	date-modified = {2010-03-01 16:46:40 -0800},
	month = {February 24-27},
	title = {Enabling Scientific Application I/O on Cloud FileSystems},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtZmFzdDEwLnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBwb2x0ZS1mYXN0MTAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2irGmQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUAAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpQOnBvbHRlLWZhc3QxMC5wZGYAAA4AIgAQAHAAbwBsAHQAZQAtAGYAYQBzAHQAMQAwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUC9wb2x0ZS1mYXN0MTAucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=}}

@techreport{ames:tr0710,
	author = {Sasha Ames and Maya B. Gokhale and Carlos Maltzahn},
	date-added = {2010-02-04 09:10:55 -0800},
	date-modified = {2010-02-04 09:13:17 -0800},
	institution = {UCSC},
	month = {February},
	number = {UCSC-SOE-10-07},
	title = {Design and Implementation of a Metadata-Rich File System},
	year = {2010},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vc3ZuL3Fmcy9tc3N0MTAtcWZzL1VDU0MtU09FLTEwLTA3LnBkZk8RAXIAAAAAAXIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAAAAAABCRAAB/////xJVQ1NDLVNPRS0xMC0wNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////AAAAAAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAAKbXNzdDEwLXFmcwACADgvOlVzZXJzOmNhcmxvc21hbHQ6c3ZuOnFmczptc3N0MTAtcWZzOlVDU0MtU09FLTEwLTA3LnBkZgAOACYAEgBVAEMAUwBDAC0AUwBPAEUALQAxADAALQAwADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L3N2bi9xZnMvbXNzdDEwLXFmcy9VQ1NDLVNPRS0xMC0wNy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHL}}

@inproceedings{brandt:pdsw09,
	abstract = {File systems are the backbone of large-scale data processing for scientific applications. Motivated by the need to provide an extensible and flexible framework beyond the abstractions provided by API libraries for files to manage and analyze large-scale data, we are developing Damasc, an enhanced file system where rich data management services for scientific computing are provided as a native part of the file system.
This paper presents our vision for Damasc, a performant file system that would allow scientists or even casual users to pose declarative queries and updates over views of underlying files that are stored in their native bytestream format. In Damasc, a configurable layer is added on top of the file system to expose the contents of files in a logical data model through which views can be defined and used for queries and updates. The logical data model and views are leveraged to optimize access to files through caching and self-organizing indexing. In addition, provenance capture and analysis to file access is also built into Damasc. We describe the salient features of our proposal and discuss how it can benefit the development of scientific code.
},
	address = {Portland, OR},
	author = {Scott A. Brandt and Carlos Maltzahn and Neoklis Polyzotis and Wang-Chiew Tan},
	booktitle = {Proceedings of the 2009 ACM Petascale Data Storage Workshop (PDSW 09)},
	date-added = {2010-01-26 23:50:43 -0800},
	date-modified = {2020-05-10 19:30:37 -0700},
	keywords = {papers, datamanagement, filesystems, programmable},
	month = {November 15},
	title = {Fusing Data Management Services with File Systems},
	year = {2009},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYnJhbmR0LXBkc3cwOS5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RYnJhbmR0LXBkc3cwOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////8cjNq4AAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUIAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QjpicmFuZHQtcGRzdzA5LnBkZgAOACQAEQBiAHIAYQBuAGQAdAAtAHAAZABzAHcAMAA5AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQi9icmFuZHQtcGRzdzA5LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{estolano:fast09,
	address = {San Francisco, CA},
	author = {Esteban Molina-Estolano and Carlos Maltzahn and Scott A. Brandt and John Bent},
	booktitle = {WiP at FAST '09},
	date-added = {2010-01-13 22:52:32 -0800},
	date-modified = {2010-01-13 23:06:07 -0800},
	month = {February 24-27},
	title = {Comparing the Performance of Different Parallel File system Placement Strategies},
	year = {2009},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lc3RvbGFuby1mYXN0MDkucGRmTxEBcgAAAAABcgACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////E2VzdG9sYW5vLWZhc3QwOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLoFtAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANFLUYAAAIAOi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RS1GOmVzdG9sYW5vLWZhc3QwOS5wZGYADgAoABMAZQBzAHQAbwBsAGEAbgBvAC0AZgBhAHMAdAAwADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADhVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9FLUYvZXN0b2xhbm8tZmFzdDA5LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABXAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAc0=}}

@inproceedings{estolano:pdsw09,
	abstract = {MapReduce-tailored distributed filesystems---such as HDFS for Hadoop MapReduce---and parallel high-performance computing filesystems are tailored for considerably different workloads. The purpose of our work is to examine the performance of each filesystem when both sorts of workload run on it concurrently.
We examine two workloads on two filesystems. For the HPC workload, we use the IOR checkpointing benchmark and the Parallel Virtual File System, Version 2 (PVFS); for Hadoop, we use an HTTP attack classifier and the CloudStore filesystem. We analyze the performance of each file system when it concurrently runs its ``native'' workload as well as the non-native workload.},
	address = {Portland, OR},
	author = {Esteban Molina-Estolano and Maya Gokhale and Carlos Maltzahn and John May and John Bent and Scott Brandt},
	booktitle = {Proceedings of the 2009 ACM Petascale Data Storage Workshop (PDSW 09)},
	date-added = {2010-01-03 23:04:09 -0800},
	date-modified = {2020-01-05 05:51:32 -0700},
	keywords = {papers, performance, hpc, mapreduce, filesystems},
	month = {November 15},
	title = {Mixing Hadoop and HPC Workloads on Parallel Filesystems},
	year = {2009},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lc3RvbGFuby1wZHN3MDkucGRmTxEBcgAAAAABcgACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////E2VzdG9sYW5vLXBkc3cwOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aNxuxAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANFLUYAAAIAOi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RS1GOmVzdG9sYW5vLXBkc3cwOS5wZGYADgAoABMAZQBzAHQAbwBsAGEAbgBvAC0AcABkAHMAdwAwADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADhVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9FLUYvZXN0b2xhbm8tcGRzdzA5LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABXAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAc0=}}

@inproceedings{ames:sosp07,
	address = {Stevenson, WA},
	author = {Sasha Ames and Carlos Maltzahn and Ethan L. Miller},
	booktitle = {Poster Session at the 21st Symposium on Operating Systems Principles (SOSP 2007)},
	date-added = {2009-09-29 12:08:09 -0700},
	date-modified = {2019-12-29 16:44:11 -0800},
	keywords = {shortpapers, metadata, filesystems, querying},
	month = {October},
	title = {A File System Query Language},
	year = {2007}}

@techreport{weil:tr-ucsc-ceph06,
	author = {Sage A. Weil and Scott A. Brandt and Ethan L. Miller and Darrell D. E. Long and Carlos Maltzahn},
	date-added = {2009-09-29 12:08:09 -0700},
	date-modified = {2009-12-14 11:54:46 -0800},
	institution = {University of California, Santa Cruz},
	local-url = {/Users/carlosmalt/Documents/Papers/weil-tr-ucsc-ceph06.pdf},
	month = {Jan},
	number = {SSRC-06-02},
	title = {Ceph: A Scalable Object-based Storage System},
	year = {2006},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1waGR0aGVzaXMwNy5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Ud2VpbC1waGR0aGVzaXMwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9YVLXYAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVcAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6Vzp3ZWlsLXBoZHRoZXNpczA3LnBkZgAADgAqABQAdwBlAGkAbAAtAHAAaABkAHQAaABlAHMAaQBzADAANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1waGR0aGVzaXMwNy5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==},
	bdsk-url-1 = {http://ceph.newdream.net/weil-thesis.pdf}}

@techreport{weil:tr-ucsc-crush06,
	author = {Sage A. Weil and Scott A. Brandt and Ethan L. Miller and Carlos Maltzahn},
	date-added = {2009-09-29 12:08:09 -0700},
	date-modified = {2009-12-14 11:54:46 -0800},
	institution = {University of California, Santa Cruz},
	local-url = {/Users/carlosmalt/Documents/Papers/weil-tr-ucsc-crush06.pdf},
	month = {Jan},
	number = {SSRC-06-01},
	title = {{CRUSH}: Controlled, Scalable, Decentralized Placement of Replicated Data},
	year = {2006},
	bdsk-url-1 = {http://www.ssrc.ucsc.edu/Papers/weil-tr-crush06.pdf}}

@inproceedings{bigelow:pdsw07,
	abstract = {Many applications---for example, scientific simulation, real-time data acquisition, and distributed reservation systems---have I/O performance requirements, yet most large, distributed storage systems lack the ability to guarantee I/O performance. We are working on end-to-end performance management in scalable, distributed storage systems. The kinds of storage systems we are targeting include large high-performance computing (HPC) clusters, which require both large data volumes and high I/O rates, as well as large-scale general-purpose storage systems.},
	address = {Reno, NV},
	author = {David Bigelow and Suresh Iyer and Tim Kaldewey and Roberto Pineiro and Anna Povzner and Scott A. Brandt and Richard Golding and Theodore Wong and Carlos Maltzahn},
	booktitle = {Proceedings of the 2007 ACM Petascale Data Storage Workshop (PDSW 07)},
	date-added = {2009-09-29 12:08:09 -0700},
	date-modified = {2020-01-05 05:56:32 -0700},
	keywords = {papers, performance, management, distributed, storage, scalable},
	title = {End-to-end Performance Management for Scalable Distributed Storage},
	year = {2007},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYmlnZWxvdy1wZHN3MDcucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EmJpZ2Vsb3ctcGRzdzA3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aNxxYAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFCAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkI6YmlnZWxvdy1wZHN3MDcucGRmAAAOACYAEgBiAGkAZwBlAGwAbwB3AC0AcABkAHMAdwAwADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9CL2JpZ2Vsb3ctcGRzdzA3LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@inproceedings{bobb:wdas06,
	address = {San Jose, CA},
	author = {Nikhil Bobb and Damian Eads and Mark W. Storer and Scott A. Brandt and Carlos Maltzahn and Ethan L. Miller},
	booktitle = {Proceedings of the 7th International Workshop on Distributed Data and Structures (WDAS 2006)},
	date-added = {2009-09-29 12:08:09 -0700},
	date-modified = {2020-01-05 05:57:37 -0700},
	local-url = {/Users/carlosmalt/Documents/Papers/bobb-wdas06.pdf},
	month = {January},
	title = {{Graffiti}: A Framework for Testing Collaborative Distributed Metadata},
	year = {2006},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYm9iYi13ZGFzMDYucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D2JvYmItd2RhczA2LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKqnXAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFCAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkI6Ym9iYi13ZGFzMDYucGRmAA4AIAAPAGIAbwBiAGIALQB3AGQAYQBzADAANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0IvYm9iYi13ZGFzMDYucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==}}

@inproceedings{storer:sisw05,
	author = {Mark Storer and Kevin Greenan and Ethan L. Miller and Carlos Maltzahn},
	booktitle = {Proceedings of the 3rd International IEEE Security in Storage Workshop},
	date-added = {2009-09-29 12:08:09 -0700},
	date-modified = {2009-12-14 11:54:42 -0800},
	local-url = {/Users/carlosmalt/Documents/Papers/storer-sisw05.pdf},
	month = dec,
	title = {POTSHARDS: Storing Data for the Long-term Without Encryption},
	year = 2005,
	bdsk-url-1 = {http://www.ssrc.ucsc.edu/Papers/storer-sisw05.pdf}}

@techreport{weil:tr-ucsc-rados07,
	author = {Sage Weil and Carlos Maltzahn and Scott A. Brandt},
	date-added = {2009-09-29 12:08:09 -0700},
	date-modified = {2009-12-14 11:54:46 -0800},
	institution = {University of California, Santa Cruz},
	local-url = {/Users/carlosmalt/Documents/Papers/weil-tr-ucsc-rados07.pdf},
	month = {Jan},
	note = {Please notify the authors when citing this tech report in a paper for publication},
	number = {SSRC-07-01},
	title = {RADOS: A Reliable Autonomic Distributed Object Store},
	year = {2007},
	bdsk-url-1 = {http://www.ssrc.ucsc.edu/Papers/weil-tr-rados07.pdf}}

@inproceedings{buck:dadc09,
	abstract = {High-end computing is increasingly I/O bound as computations become more data-intensive, and data transport technologies struggle to keep pace with the demands of large-scale, distributed computations. One approach to avoiding unnecessary I/O is to move the processing to the data, as seen in Google's successful, but relatively specialized, MapReduce system. This paper discusses our investigation towards a general solution for enabling in-situ computation in a peta-scale storage system. We believe our work with flexible, application-specific structured storage is the key to addressing the I/O overhead caused by data partitioning across storage nodes. In order to manage competing workloads on storage nodes, our research in system performance management is leveraged. Our ultimate goal is a general framework for in-situ data-intensive processing, indexing, and searching, which we expect to provide orders of magnitude performance increases for data-intensive workloads.},
	address = {Munich, Germany},
	author = {Joe Buck and Noah Watkins and Carlos Maltzahn and Scott A. Brandt},
	booktitle = {2nd International Workshop on Data-Aware Distributed Computing (in conjunction with HPDC-18)},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:01:11 -0700},
	keywords = {papers, filesystems, programmable},
	month = {June 9},
	title = {Abstract Storage: Moving file format-specific abstractions into petabyte-scale storage systems},
	year = {2009},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYnVjay1kYWRjMDkucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D2J1Y2stZGFkYzA5LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aNx3VAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFCAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkI6YnVjay1kYWRjMDkucGRmAA4AIAAPAGIAdQBjAGsALQBkAGEAZABjADAAOQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0IvYnVjay1kYWRjMDkucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==}}

@inproceedings{brandt:ospert08,
	abstract = {Real-time systems are growing in size and complexity and must often manage multiple competing tasks in environments where CPU is not the only limited shared resource. Memory, network, and other devices may also be shared and system-wide performance guarantees may require the allocation and scheduling of many diverse resources. We present our on-going work on performance management in a representative distributed real-time system---a distributed storage system with performance requirements---and discuss our integrated model for managing diverse resources to provide end-to-end performance guarantees.
},
	address = {Prague, Czech Republic},
	author = {Scott A. Brandt and Carlos Maltzahn and Anna Povzner and Roberto Pineiro and Andrew Shewmaker and Tim Kaldewey},
	booktitle = {OSPERT 2008},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:01:44 -0700},
	keywords = {papers, storage, systems, distributed, performance, management, qos, realtime},
	month = {July},
	title = {An Integrated Model for Performance Management in a Distributed System},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYnJhbmR0LW9zcGVydDA4LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNicmFuZHQtb3NwZXJ0MDgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////xtgY8AAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQgAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpCOmJyYW5kdC1vc3BlcnQwOC5wZGYADgAoABMAYgByAGEAbgBkAHQALQBvAHMAcABlAHIAdAAwADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9CL2JyYW5kdC1vc3BlcnQwOC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@article{estolano:jpcs09,
	abstract = {Parallel file systems are gaining in popularity in high-end computing centers as well as commercial data centers. High-end computing systems are expected to scale exponentially and to pose new challenges to their storage scalability in terms of cost and power. To address these challenges scientists and file system designers will need a thorough understanding of the design space of parallel file systems. Yet there exist few systematic studies of parallel file system behavior at petabyte- and exabyte scale. An important reason is the significant cost of getting access to large-scale hardware to test parallel file systems. To contribute to this understanding we are building a parallel file system simulator that can simulate parallel file systems at very large scale. Our goal is to simulate petabyte-scale parallel file systems on a small cluster or even a single machine in reasonable time and fidelity. With this simulator, file system experts will be able to tune existing file systems for specific workloads, scientists and file system deployment engineers will be able to better communicate workload requirements, file system designers and researchers will be able to try out design alternatives and innovations at scale, and instructors will be able to study very large-scale parallel file system behavior in the class room. In this paper we describe our approach and provide preliminary results that are encouraging both in terms of fidelity and simulation scalability.},
	author = {Esteban Molina-Estolano and Carlos Maltzahn and John Bent and Scott A. Brandt},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:02:20 -0700},
	journal = {J. Phys.: Conf. Ser.},
	keywords = {papers, performance, simulation, filesystems},
	number = {012050},
	title = {Building a Parallel File System Simulator},
	volume = {126},
	year = {2009},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lc3RvbGFuby1qcGNzMDkucGRmTxEBcgAAAAABcgACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////E2VzdG9sYW5vLWpwY3MwOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aNx6YAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANFLUYAAAIAOi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RS1GOmVzdG9sYW5vLWpwY3MwOS5wZGYADgAoABMAZQBzAHQAbwBsAGEAbgBvAC0AagBwAGMAcwAwADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADhVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9FLUYvZXN0b2xhbm8tanBjczA5LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABXAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAc0=}}

@inproceedings{weil:osdi06,
	abstract = {provides excellent performance, reliability, and scalability. Ceph maximizes the separation between data and metadata management by replacing allocation tables with a pseudo-random data distribution function (CRUSH) designed for heterogeneous and dynamic clusters of unreliable object storage devices (OSDs). We leverage device intelligence by distributing data replication, failure detection and recovery to semi-autonomous OSDs running a specialized local object file system. A dynamic distributed metadata cluster provides extremely efficient metadata management and seamlessly adapts to a wide range of general purpose and scientific computing file system workloads. Performance measurements under a variety of workloads show that Ceph has excellent I/O performance and scalable metadata management, supporting more than 250,000 metadata operations per second.
},
	address = {Seattle, WA},
	author = {Sage A. Weil and Scott A. Brandt and Ethan L. Miller and Darrell D. E. Long and Carlos Maltzahn},
	booktitle = {OSDI'06},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:03:57 -0700},
	keywords = {papers, parallel, filesystems, distributed, storage, systems, obsd, p2p},
	month = {November},
	read = {1},
	title = {{Ceph}: A Scalable, High-Performance Distributed File System},
	year = 2006,
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1vc2RpMDYucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D3dlaWwtb3NkaTA2LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////WMzi8AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFXAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlc6d2VpbC1vc2RpMDYucGRmAA4AIAAPAHcAZQBpAGwALQBvAHMAZABpADAANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1vc2RpMDYucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==}}

@inproceedings{maltzahn:chi95,
	abstract = {In a research community each research er knows only a small fraction of the vast number of tools offered in the continually changing environment of local computer networks. Since the on-line or off-line documentation for these tools poorly support people in finding the best tool for a given task, users prefer to ask colleagues. however, finding the right person to ask can be time consuming and asking questions can reveal incompetence. In this paper we present an architecture to a community sensitive help system which actively collects information about Unix tools by tapping into accounting information generated by the operating system and by interviewing users that are selected on the basis of collected information. The result is a help system that continually seeks to update itself, that contains information  that is entirely based on the community's perspective on tools, and that consequently grows with the community and its dynamic environments.},
	address = {Denver, CO},
	author = {Carlos Maltzahn},
	booktitle = {CHI '95},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:06:12 -0700},
	keywords = {papers, cscw},
	month = {May},
	title = {Community Help: Discovering Tools and Locating Experts in a Dynamic Environment},
	year = {1995},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tY2hpOTUucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////Em1hbHR6YWhuLWNoaTk1LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////YW57lAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFNAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOk06bWFsdHphaG4tY2hpOTUucGRmAAAOACYAEgBtAGEAbAB0AHoAYQBoAG4ALQBjAGgAaQA5ADUALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9NL21hbHR6YWhuLWNoaTk1LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@inproceedings{weil:sc06,
	abstract = {Emerging large-scale distributed storage systems are faced with the task of distributing petabytes of data among tens or hundreds of thousands of storage devices. Such systems must evenly distribute data and workload to efficiently utilize available resources and maximize system performance, while facilitating system growth and managing hardware failures. We have developed CRUSH, a scalable pseudo-random data distribution function designed for distributed object-based storage systems that efficiently maps data objects to storage devices without relying on a central directory. Because large systems are inherently dynamic, CRUSH is designed to facilitate the addition and removal of storage while minimizing unnecessary data movement. The algorithm accommodates a wide variety of data replication and reliability mechanisms and distributes data in terms of user-defined policies that enforce separation of replicas across failure domains.},
	address = {Tampa, FL},
	author = {Sage A. Weil and Scott A. Brandt and Ethan L. Miller and Carlos Maltzahn},
	booktitle = {SC '06},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:10:11 -0700},
	keywords = {papers, hashing, parallel, filesystems, placement, related:ceph, obsd},
	month = {November},
	publisher = {ACM},
	title = {{CRUSH}: Controlled, Scalable, Decentralized Placement of Replicated Data},
	year = {2006},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAoLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1zYzA2LnBkZk8RAVQAAAAAAVQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w13ZWlsLXNjMDYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2tTq2QAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgAyLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndlaWwtc2MwNi5wZGYADgAcAA0AdwBlAGkAbAAtAHMAYwAwADYALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADBVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9XL3dlaWwtc2MwNi5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQATwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAGn}}

@unpublished{kroeger:unpublished96,
	author = {Thomas M. Kroeger and Jeff Mogul and Carlos Maltzahn},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2009-12-14 11:55:00 -0800},
	local-url = {/Users/carlosmalt/Documents/Papers/kroeger-unpublished96.pdf},
	note = {ftp://ftp.digital.com/pub/DEC/traces/proxy/webtraces.v1.2.html},
	title = {{D}igital's {W}eb Proxy Traces},
	year = {1996}}

@article{povzner:osr08,
	abstract = {Guaranteed I/O performance is needed for a variety of applications ranging from real-time data collection to desktop multimedia to large-scale scientific simulations. Reservations on throughput, the standard measure of disk performance, fail to effectively manage disk performance due to the orders of magnitude difference between best-, average-, and worst-case response times, allowing reservation of less than 0.01% of the achievable bandwidth. We show that by reserving disk resources in terms of utilization it is possible to create a disk scheduler that supports reservation of nearly 100% of the disk resources, provides arbitrarily hard or soft guarantees depending upon application needs, and yields efficiency as good or better than best-effort disk schedulers tuned for performance. We present the architecture of our scheduler, prove the correctness of its algorithms, and provide results demonstrating its effectiveness.},
	author = {Anna Povzner and Tim Kaldewey and Scott A. Brandt and Richard Golding and Theodore Wong and Carlos Maltzahn},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:12:06 -0700},
	journal = {Operating Systems Review},
	keywords = {papers, predictable, performance, storage, media, realtime},
	month = {May},
	number = {4},
	pages = {13-25},
	title = {Efficient Guaranteed Disk Request Scheduling with Fahrrad},
	volume = {42},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG92em5lci1vc3IwOC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RcG92em5lci1vc3IwOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqsQcAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVAAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UDpwb3Z6bmVyLW9zcjA4LnBkZgAOACQAEQBwAG8AdgB6AG4AZQByAC0AbwBzAHIAMAA4AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUC9wb3Z6bmVyLW9zcjA4LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}

@inproceedings{povzner:eurosys08,
	abstract = {Guaranteed I/O performance is needed for a variety of applications ranging from real-time data collection to desktop multimedia to large-scale scientific simulations. Reservations on throughput, the standard measure of disk performance, fail to effectively manage disk performance due to the orders of magnitude difference between best-, average-, and worst-case response times, allowing reservation of less than 0.01% of the achievable bandwidth. We show that by reserving disk resources in terms of utilization it is possible to create a disk scheduler that supports reservation of nearly 100% of the disk resources, provides arbitrarily hard or soft guarantees depending upon application needs, and yields efficiency as good or better than best-effort disk schedulers tuned for performance. We present the architecture of our scheduler, prove the correctness of its algorithms, and provide results demonstrating its effectiveness.},
	address = {Glasgow, Scottland},
	author = {Anna Povzner and Tim Kaldewey and Scott A. Brandt and Richard Golding and Theodore Wong and Carlos Maltzahn},
	booktitle = {Eurosys 2008},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:12:26 -0700},
	keywords = {papers, performance, management, storage, systems, fahrrad, rbed, realtime, qos},
	month = {March 31 - April 4},
	title = {Efficient Guaranteed Disk Request Scheduling with Fahrrad},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG92em5lci1ldXJvc3lzMDgucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FXBvdnpuZXItZXVyb3N5czA4LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////asg05AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFQAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlA6cG92em5lci1ldXJvc3lzMDgucGRmAA4ALAAVAHAAbwB2AHoAbgBlAHIALQBlAHUAcgBvAHMAeQBzADAAOAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1AvcG92em5lci1ldXJvc3lzMDgucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}

@article{maltzahn:ddas07,
	abstract = {Managing storage in the face of relentless growth in the number and variety of files on storage systems creates demand for rich file system metadata as is made evident by the recent emergence of rich metadata support in many applications as well as file systems. Yet, little support exists for sharing metadata across file systems even though it is not uncommon for users to manage multiple file systems and to frequently share copies of files across devices and with other users. Encouraged by the surge in popularity for collaborative bookmarking sites that share the burden of creating metadata for online content [21] we present Graffiti, a distributed organization layer for collaboratively sharing rich metadata across heterogeneous file systems. The primary purpose of Graffiti is to provide a research and rapid prototyping platform for managing metadata across file systems and users.},
	author = {Carlos Maltzahn and Nikhil Bobb and Mark W. Storer and Damian Eads and Scott A. Brandt and Ethan L. Miller},
	booktitle = {Distributed Data \& Structures 7},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:13:12 -0700},
	editor = {Thomas Schwarz},
	journal = {Proceedings in Informatics},
	keywords = {papers, pim, tagging, distributed, naming, linking, metadata},
	pages = {97-111},
	publisher = {Carleton Scientific},
	read = {Yes},
	title = {Graffiti: A Framework for Testing Collaborative Distributed Metadata},
	volume = {21},
	year = {2007},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tZGRhczA3LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNtYWx0emFobi1kZGFzMDcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2OF6lwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWRkYXMwNy5wZGYADgAoABMAbQBhAGwAdAB6AGEAaABuAC0AZABkAGEAcwAwADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9NL21hbHR6YWhuLWRkYXMwNy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{rose:caise92,
	abstract = {Repositories provide the information system's support to layer software environments. Initially, repository technology has been dominated by object representation issues. Teams are not part of the ball game. In this paper, we propose the concept of sharing processes which supports distribution and sharing of objects and tasks by teams. Sharing processes are formally specified as classes of non-deterministic f'mite automata connected to each other by deduction rules. They are intended to coordinate object access and communication for task distribution in large development projects. In particular, we show how interactions between both sharings improve object management.},
	address = {Manchester, UK},
	author = {Thomas Rose and Carlos Maltzahn and Matthias Jarke},
	booktitle = {Advanced Information Systems Engineering (CAiSE'92)},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:14:53 -0700},
	editor = {Pericles Loucopoulos},
	keywords = {papers, sharing, cscw, datamanagement},
	month = {May 12--15},
	pages = {17--32},
	publisher = {Springer Berlin / Heidelberg},
	series = {Lecture Notes in Computer Science},
	title = {Integrating object and agent worlds},
	volume = {593},
	year = {1992},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1EtUi9yb3NlLWNhaXNlOTIucGRmTxEBaAAAAAABaAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EHJvc2UtY2Fpc2U5Mi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////G1X/VAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANRLVIAAAIANy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6US1SOnJvc2UtY2Fpc2U5Mi5wZGYAAA4AIgAQAHIAbwBzAGUALQBjAGEAaQBzAGUAOQAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA1VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUS1SL3Jvc2UtY2Fpc2U5Mi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFQAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABwA==}}

@inproceedings{ames:mss06,
	abstract = {As the number and variety of files stored and accessed by a typical user has dramatically increased, existing file system structures have begun to fail as a mechanism for managing all of the information contained in those files. Many applications---email clients, multimedia management applications, and desktop search engines are examples--- have been forced to develop their own richer metadata infrastructures. While effective, these solutions are generally non-standard, non-portable, non-sharable across applications, users or platforms, proprietary, and potentially inefficient. In the interest of providing a rich, efficient, shared file system metadata infrastructure, we have developed the Linking File System (LiFS). Taking advantage of non-volatile storage class memories, LiFS supports a wide variety of user and application metadata needs while efficiently supporting traditional file system operations.},
	address = {College Park, MD},
	author = {Sasha Ames and Nikhil Bobb and Kevin M. Greenan and Owen S. Hofmann and Mark W. Storer and Carlos Maltzahn and Ethan L. Miller and Scott A. Brandt},
	booktitle = {MSST '06},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:15:24 -0700},
	keywords = {papers, linking, systems, storage, metadata, storagemedium, related:quasar, filesystems},
	local-url = {/Users/carlosmalt/Documents/Papers/ames-mss06.pdf},
	month = {May},
	organization = {IEEE},
	title = {{LiFS}: An Attribute-Rich File System for Storage Class Memories},
	year = {2006},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1tc3MwNi5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8OYW1lcy1tc3MwNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////8WHtvQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUEAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QTphbWVzLW1zczA2LnBkZgAADgAeAA4AYQBtAGUAcwAtAG0AcwBzADAANgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1tc3MwNi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==}}

@inproceedings{maltzahn:wcw99,
	abstract = {The bandwidth usage due to HTTP traffic often varies considerably over the course of a day, requiring high network performance during peak periods while leaving network resources unused during off-peak periods. We show that using these extra network resources to prefetch web content during off-peak periods can significantly reduce peak bandwidth usage without compromising cache consistency. With large HTTP traffic variations it is therefore feasible to apply ``bandwidth smoothing'' to reduce the cost and the required capacity of a network infrastructure. In addition to reducing the peak network demand, bandwidth smoothing improves cache hit rates. We apply machine learning techniques to automatically develop prefetch strategies that have high accuracy. Our results are based on web proxy traces generated at a large corporate Internet exchange point and data collected from recent scans of popular web sites},
	address = {San Diego, CA},
	author = {Carlos Maltzahn and Kathy Richardson and Dirk Grunwald and James Martin},
	booktitle = {4th International Web Caching Workshop (WCW'99)},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:17:30 -0700},
	keywords = {papers, networking, intermediary, machinelearning, webcaching},
	month = {March 31 - April 2},
	title = {On Bandwidth Smoothing},
	year = {1999},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4td2N3OTkucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////Em1hbHR6YWhuLXdjdzk5LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////M5620AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFNAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOk06bWFsdHphaG4td2N3OTkucGRmAAAOACYAEgBtAGEAbAB0AHoAYQBoAG4ALQB3AGMAdwA5ADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9NL21hbHR6YWhuLXdjdzk5LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@article{maltzahn:per97,
	abstract = {Enterprise level web proxies relay world-wide web traffic between private networks and the Internet. They improve security, save network bandwidth, and reduce network latency. While the performance of web proxies has been analyzed based on synthetic workloads, little is known about their performance on real workloads. In this paper we present a study of two web proxies (CERN and Squid) executing real workloads on Digital's Palo Alto Gateway. We demonstrate that the simple CERN proxy architecture outperforms all but the latest version of Squid and continues to outperform cacheless configurations. For the measured load levels the Squid proxy used at least as many CPU, memory, and disk resources as CERN, in some configurations significantly more resources. At higher load levels the resource utilization requirements will cross and Squid will be the one using fewer resources. Lastly we found that cache hit rates of around 30% had very little effect on the requests service time.},
	author = {Carlos Maltzahn and Kathy Richardson and Dirk Grunwald},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:18:29 -0700},
	journal = {ACM SIGMETRICS Performance Evaluation Review},
	keywords = {papers, performance, webcaching, networking, intermediary},
	month = {June},
	number = {1},
	pages = {13-23},
	title = {Performance Issues of Enterprise Level Web Proxies},
	volume = {25},
	year = {1997},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tc2lnbWV0cmljczk3LnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xltYWx0emFobi1zaWdtZXRyaWNzOTcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iqrEAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLXNpZ21ldHJpY3M5Ny5wZGYADgA0ABkAbQBhAGwAdAB6AGEAaABuAC0AcwBpAGcAbQBlAHQAcgBpAGMAcwA5ADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9NL21hbHR6YWhuLXNpZ21ldHJpY3M5Ny5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}

@inproceedings{maltzahn:sigmetrics97,
	abstract = {Enterprise level web proxies relay world-wide web traffic between private networks and the Internet. They improve security, save network bandwidth, and reduce network latency. While the performance of web proxies has been analyzed based on synthetic workloads, little is known about their performance on real workloads. In this paper we present a study of two web proxies (CERN and Squid) executing real workloads on Digital's Palo Alto Gateway. We demonstrate that the simple CERN proxy architecture outperforms all but the latest version of Squid and continues to outperform cacheless configurations. For the measured load levels the Squid proxy used at least as many CPU, memory, and disk resources as CERN, in some configurations significantly more resources. At higher load levels the resource utilization requirements will cross and Squid will be the one using fewer resources. Lastly we found that cache hit rates of around 30% had very little effect on the requests service time.},
	address = {Seattle, WA},
	author = {Carlos Maltzahn and Kathy Richardson and Dirk Grunwald},
	booktitle = {SIGMETRICS 1997},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:19:28 -0700},
	keywords = {papers, performance, tracing, networking, intermediary, webcaching},
	month = {June 15-18},
	pages = {13--23},
	read = {Yes},
	title = {Performance Issues of Enterprise Level Web Proxies},
	year = {1997},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tc2lnbWV0cmljczk3LnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xltYWx0emFobi1zaWdtZXRyaWNzOTcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iqrEAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLXNpZ21ldHJpY3M5Ny5wZGYADgA0ABkAbQBhAGwAdAB6AGEAaABuAC0AcwBpAGcAbQBlAHQAcgBpAGMAcwA5ADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9NL21hbHR6YWhuLXNpZ21ldHJpY3M5Ny5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}

@inproceedings{kaldewey:fast08wip,
	address = {San Jose, CA},
	author = {Tim Kaldewey and Andrew Shewmaker and Richard Golding and Carlos Maltzahn and Theodore Wong and Scott A. Brandt},
	booktitle = {Work in Progress at 6th USENIX Conference on File and Storage Technologies (FAST '08)},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2019-12-29 16:42:04 -0800},
	keywords = {shortpapers, qos, networking, storage},
	month = {February 26-29},
	title = {RADoN: QoS in Storage Networks},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva2FsZGV3ZXktZmFzdDA4d2lwLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZrYWxkZXdleS1mYXN0MDh3aXAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2i6HagAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpLOmthbGRld2V5LWZhc3QwOHdpcC5wZGYAAA4ALgAWAGsAYQBsAGQAZQB3AGUAeQAtAGYAYQBzAHQAMAA4AHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSy9rYWxkZXdleS1mYXN0MDh3aXAucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=}}

@inproceedings{weil:pdsw07,
	abstract = {Brick and object-based storage architectures have emerged as a means of improving the scalability of storage clusters. However, existing systems continue to treat storage nodes as passive devices, despite their ability to exhibit significant intelligence and autonomy. We present the design and implementation of RADOS, a reliable object storage service that can scales to many thousands of devices by leveraging the intelligence present in individual storage nodes. RADOS preserves consistent data access and strong safety semantics while allowing nodes to act semi-autonomously to self-manage replication, failure detection, and failure recovery through the use of a small cluster map. Our implementation offers excellent performance, reliability, and scalability while providing clients with the illusion of a single logical object store.},
	address = {Reno, NV},
	author = {Sage A. Weil and Andrew Leung and Scott A. Brandt and Carlos Maltzahn},
	booktitle = {Proceedings of the 2007 ACM Petascale Data Storage Workshop (PDSW 07)},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:20:07 -0700},
	keywords = {papers, obsd, distributed, storage, systems, related:x10},
	local-url = {/Users/carlosmalt/Documents/Papers/weil-pdsw07.pdf},
	month = {November},
	title = {RADOS: A Fast, Scalable, and Reliable Storage Service for Petabyte-scale Storage Clusters},
	year = {2007},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1wZHN3MDcucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D3dlaWwtcGRzdzA3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////F1X98AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFXAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlc6d2VpbC1wZHN3MDcucGRmAA4AIAAPAHcAZQBpAGwALQBwAGQAcwB3ADAANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1wZHN3MDcucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==}}

@inproceedings{maltzahn:usenix99,
	abstract = {The dramatic increase of HTTP traffic on the Internet has resulted in wide-spread use of large caching proxy servers as critical Internet infrastructure components. With continued growth the demand for larger caches and higher performance proxies grows as well. The common bottleneck of large caching proxy servers is disk I/O. In this paper we evaluate ways to reduce the amount of required disk I/O. First we compare the file system interactions of two existing web proxy servers, CERN and SQUID. Then we show how design adjustments to the current SQUID cache architecture can dramatically reduce disk I/O. Our findings suggest two that strategies can significantly reduce disk I/O: (1) preserve locality of the HTTP reference stream while translating these references into cache references, and (2) use virtual memory instead of the file system for objects smaller than the system page size. The evaluated techniques reduced disk I/O by 50% to 70%.},
	address = {Monterey, CA},
	author = {Carlos Maltzahn and Kathy Richardson and Dirk Grunwald},
	booktitle = {USENIX ATC '99},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:20:58 -0700},
	keywords = {papers, networking, intermediary, storage, webcaching},
	month = {June 6-11},
	read = {Yes},
	title = {Reducing the Disk I/O of Web Proxy Server Caches},
	year = {1999},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tdXNlbml4OTkucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FW1hbHR6YWhuLXVzZW5peDk5LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKqp1AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFNAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOk06bWFsdHphaG4tdXNlbml4OTkucGRmAA4ALAAVAG0AYQBsAHQAegBhAGgAbgAtAHUAcwBlAG4AaQB4ADkAOQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tdXNlbml4OTkucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}

@inproceedings{ames:mss05,
	abstract = {Traditional file systems provide a weak and inadequate structure for meaningful representations of file interrelationships and other context-providing metadata. Existing designs, which store additional file-oriented metadata either in a database, on disk, or both are limited by the technologies upon which they depend. Moreover, they do not provide for user-defined relationships among files. To address these issues, we created the Linking File System (LiFS), a file system design in which files may have both arbitrary user- or application-specified attributes, and attributed links between files. In order to assure performance when accessing links and attributes, the system is designed to store metadata in non-volatile memory. This paper discusses several use cases that take advantage of this approach and describes the user-space prototype we developed to test the concepts presented.
},
	address = {Monterey, CA},
	author = {Alexander Ames and Nikhil Bobb and Scott A. Brandt and Adam Hiatt and Carlos Maltzahn and Ethan L. Miller and Alisa Neeman and Deepa Tuteja},
	booktitle = {MSST '05},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:21:32 -0700},
	keywords = {papers, ssrc, metadata, filesystems, linking},
	local-url = {/Users/carlosmalt/Documents/Papers/ames-mss05.pdf},
	month = {April},
	title = {Richer File System Metadata Using Links and Attributes},
	year = {2005},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1tc3MwNS5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8OYW1lcy1tc3MwNS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////8RqJcwAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUEAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QTphbWVzLW1zczA1LnBkZgAADgAeAA4AYQBtAGUAcwAtAG0AcwBzADAANQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1tc3MwNS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==}}

@inproceedings{koren:pdsw07,
	abstract = {As users interact with file systems of ever increasing size, it is becoming more difficult for them to familiarize themselves with the entire contents of the file system. In petabyte-scale systems, users must navigate a pool of billions of shared files in order to find the information they are looking for. One way to help alleviate this problem is to integrate navigation and search into a common framework.
One such method is faceted search. This method originated within the information retrieval community, and has proved popular for navigating large repositories, such as those in e-commerce sites and digital libraries. This paper introduces faceted search and outlines several current research directions in adapting faceted search techniques to petabyte-scale file systems.},
	address = {Reno, NV},
	author = {Jonathan Koren and Yi Zhang and Sasha Ames and Andrew Leung and Carlos Maltzahn and Ethan L. Miller},
	booktitle = {Proceedings of the 2007 ACM Petascale Data Storage Workshop (PDSW 07)},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:24:17 -0700},
	keywords = {papers, ir, filesystems, metadata, facets, search},
	month = {November},
	title = {Searching and Navigating Petabyte Scale File Systems Based on Facets},
	year = {2007},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva29yZW4tcGRzdzA3LnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBrb3Jlbi1wZHN3MDcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2jcjVgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSwAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpLOmtvcmVuLXBkc3cwNy5wZGYAAA4AIgAQAGsAbwByAGUAbgAtAHAAZABzAHcAMAA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSy9rb3Jlbi1wZHN3MDcucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=}}

@article{jarke:ijicis92,
	abstract = {Information systems support for design environments emphasizes object management and tends to neglect the growing demand for team support. Process management is often tackled by rigid technological protocols which are likely to get in the way of group productivity and quality. Group tools must be introduced in an unobtrusive way which extends current practice yet provides structure and documentation of development experiences. The concept of sharing processes allows agents to coordinate the sharing of ideas, tasks, and results by interacting protocol automata which can be dynamically adapted to situational requirements. Inconsistency is managed with equal emphasis as consistency. The sharing process approach has been implemented in a system called ConceptTalk which has been experimentally integrated with design environments for information and hypertext systems.},
	author = {Matthias Jarke and Carlos Maltzahn and Thomas Rose},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:25:27 -0700},
	journal = {International Journal of Intelligent and Cooperative Information Systems},
	keywords = {papers, sharing, cscw, datamanagement},
	number = {1},
	pages = {145--167},
	title = {Sharing Processes: Team Coordination in Design Repositories},
	volume = {1},
	year = {1992},
	bdsk-url-1 = {https://www.worldscientific.com/doi/abs/10.1142/S0218215792000076}}

@inproceedings{ellis:hicss97,
	abstract = {Chautauqua is an exploratory workflow management system designed and implemented within the Collaboration Technology Research group (CTRG) at the University of Colorado. This system represents a tightly knit merger of workflow technology and groupware technology. Chautauqua has been in test usage at the University of Colorado since 1995. This document discusses Chautauqua - its motivation, its design, and its implementation. Our emphasis here is on its novel features, and the techniques for implementing these features.},
	address = {Wailea, Maui, HI},
	author = {Clarence E. Ellis and Carlos Maltzahn},
	booktitle = {30th Hawaii International Conference on System Sciences, Information System Track},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:26:44 -0700},
	keywords = {papers, workflow, cscw},
	month = {January},
	title = {The Chautauqua Workflow System},
	year = {1997},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lbGxpcy1oaWNzczk3LnBkZk8RAWoAAAAAAWoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xFlbGxpcy1oaWNzczk3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////vNABiAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADRS1GAAACADgvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkUtRjplbGxpcy1oaWNzczk3LnBkZgAOACQAEQBlAGwAbABpAHMALQBoAGkAYwBzAHMAOQA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA2VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvRS1GL2VsbGlzLWhpY3NzOTcucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFUAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABww==}}

@misc{mowat:netapp07,
	author = {J. Eric Mowat and Yee-Peng Wang and Carlos Maltzahn and Raghu C. Mallena},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2019-12-29 16:52:04 -0800},
	keywords = {patents, caching, webcaching},
	month = {July},
	title = {United States Patent 7,249,219: Method and Apparatus to Improve Buffer Cache Hit Rate},
	year = {2007},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbW93YXQtbmV0YXBwMDcucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////Em1vd2F0LW5ldGFwcDA3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLopKAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFNAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOk06bW93YXQtbmV0YXBwMDcucGRmAAAOACYAEgBtAG8AdwBhAHQALQBuAGUAdABhAHAAcAAwADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9NL21vd2F0LW5ldGFwcDA3LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}

@inproceedings{kaldewey:rtas08,
	abstract = {Large- and small-scale storage systems frequently serve a mixture of workloads, an increasing number of which require some form of performance guarantee. Providing guaranteed disk performance---the equivalent of a ``virtual disk''---is challenging because disk requests are non-preemptible and their execution times are stateful, partially non-deterministic, and can vary by orders of magnitude. Guaranteeing throughput, the standard measure of disk performance, requires worst-case I/O time assumptions orders of magnitude greater than average I/O times, with correspondingly low performance and poor control of the resource allocation. We show that disk time utilization--- analogous to CPU utilization in CPU scheduling and the only fully provisionable aspect of disk performance---yields greater control, more efficient use of disk resources, and better isolation between request streams than bandwidth or I/O rate when used as the basis for disk reservation and scheduling.},
	address = {St. Louis, Missouri},
	annote = {Springer Journal of Real-Time Systems Award for Best Student Paper},
	author = {Tim Kaldewey and Anna Povzner and Theodore Wong and Richard Golding and Scott A. Brandt and Carlos Maltzahn},
	booktitle = {RTAS 2008},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2020-01-05 06:27:49 -0700},
	keywords = {papers, performance, management, storage, systems, fahrrad, rbed, qos},
	month = {April},
	title = {Virtualizing Disk Performance},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva2FsZGV3ZXktcnRhczA4LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNrYWxkZXdleS1ydGFzMDgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////xT4Q0gAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpLOmthbGRld2V5LXJ0YXMwOC5wZGYADgAoABMAawBhAGwAZABlAHcAZQB5AC0AcgB0AGEAcwAwADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9LL2thbGRld2V5LXJ0YXMwOC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}

@inproceedings{povzner:fast08wip,
	author = {Anna Povzner and Scott A. Brandt and Richard Golding and Theodore Wong and Carlos Maltzahn},
	booktitle = {Work in Progress at 6th USENIX Conference on File and Storage Technologies (FAST '08)},
	date-added = {2009-09-29 12:06:25 -0700},
	date-modified = {2019-12-29 16:55:18 -0800},
	keywords = {shortpapers, predictable, performance, storage},
	title = {Virtualizing Disk Performance with Fahrrad},
	year = {2008},
	bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG92em5lci1mYXN0MDh3aXAucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FXBvdnpuZXItZmFzdDA4d2lwLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLoN1AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFQAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlA6cG92em5lci1mYXN0MDh3aXAucGRmAA4ALAAVAHAAbwB2AHoAbgBlAHIALQBmAGEAcwB0ADAAOAB3AGkAcAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1AvcG92em5lci1mYXN0MDh3aXAucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}