forked from HugoBlox/theme-academic-cv
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmaltzahn.bib
2321 lines (2132 loc) · 325 KB
/
maltzahn.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
%% This BibTeX bibliography file was created using BibDesk.
%% https://bibdesk.sourceforge.io/
%% Created for Carlos Maltzahn at 2023-09-03 17:15:09 -0700
%% Saved with string encoding Unicode (UTF-8)
@inproceedings{liu:hpec23,
abstract = {High-performance computing (HPC) systems researchers have proposed using current, programmable network interface cards (or SmartNICs) to offload data management services that would otherwise consume host processor cycles in a platform. While this work has successfully mapped data pipelines to a collection of SmartNICs, users require a flexible means of inspecting in-transit data to assess the live state of the system. In this paper, we explore SmartNIC-driven opportunistic query execution, i.e., enabling the SmartNIC to make a decision about whether to execute a query operation locally (i.e., ``offload'') or defer execution to the client (i.e., ``push-back''). Characterizations of different parts of the end-to-end query path allow the decision engine to make complexity predictions that would not be feasible by the client alone.},
address = {Virtual},
author = {Jianshen Liu and Carlos Maltzahn and Craig Ulmer},
booktitle = {HPEC '23},
date-added = {2023-08-29 19:45:03 -0700},
date-modified = {2023-08-29 19:56:34 -0700},
keywords = {papers, smartnics, querying, queryprocessing, streaming, streamprocessing, analysis},
month = {September 25-29},
title = {{Opportunistic Query Execution on SmartNICs for Analyzing In-Transit Data}},
year = {2023}}
@inproceedings{ulmer:compsys23,
address = {St. Petersburg, FL, USA},
author = {Craig Ulmer and Jianshen Liu and Carlos Maltzahn and Matthew L. Curry},
booktitle = {2nd Workshop on Composable Systems (COMPSYS 2023, co-located with IPDPS 2023)},
date-added = {2023-03-09 10:29:28 -0800},
date-modified = {2023-03-09 10:30:50 -0800},
keywords = {smartnics, composability, datamanagement},
month = {May 15-19},
title = {{Extending Composable Data Services into SmartNICS (Best Paper Award)}},
year = {2023},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1UtVi91bG1lci1jb21wc3lzMjMucGRmTxEBcgAAAAABcgACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////E3VsbWVyLWNvbXBzeXMyMy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////gN9l2AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANVLVYAAAIAOi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6VS1WOnVsbWVyLWNvbXBzeXMyMy5wZGYADgAoABMAdQBsAG0AZQByAC0AYwBvAG0AcABzAHkAcwAyADMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADhVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9VLVYvdWxtZXItY29tcHN5czIzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABXAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAc0=}}
@unpublished{amvrosiadis:nsfvision18,
author = {George Amvrosiadis and Ali R. Butt and Vasily Tarasov and Ming Zhao and others},
date-added = {2023-01-13 13:20:46 -0800},
date-modified = {2023-01-13 13:20:46 -0800},
keywords = {papers, vision, storage, systems, research},
month = {May 30 - June 1},
note = {Report on NSF Visioning Workshop},
title = {Data Storage Research Vision 2025},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW12cm9zaWFkaXMtbnNmdmlzaW9uMTgucGRmTxEBjAAAAAABjAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////G2FtdnJvc2lhZGlzLW5zZnZpc2lvbjE4LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////X+gitAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFBAAACAEAvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkE6YW12cm9zaWFkaXMtbnNmdmlzaW9uMTgucGRmAA4AOAAbAGEAbQB2AHIAbwBzAGkAYQBkAGkAcwAtAG4AcwBmAHYAaQBzAGkAbwBuADEAOAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW12cm9zaWFkaXMtbnNmdmlzaW9uMTgucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAF0AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB7Q==},
bdsk-url-1 = {https://www.overleaf.com/7988123186fbmpsqghjkgr}}
@inproceedings{jimenez:agu18,
author = {Ivo Jimenez and Carlos Maltzahn},
booktitle = {AGU Fall Meeting},
date-added = {2023-01-11 22:59:55 -0800},
date-modified = {2023-01-11 23:06:28 -0800},
keywords = {reproducibility},
month = {December 12-14},
title = {Reproducible, Automated and Portable Computational and Data Science Experimentation Pipelines with Popper},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LWFndTE4LnBkZk8RAWoAAAAAAWoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xFqaW1lbmV6LWFndTE4LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+TxDwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADgvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaW1lbmV6LWFndTE4LnBkZgAOACQAEQBqAGkAbQBlAG4AZQB6AC0AYQBnAHUAMQA4AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA2VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2ppbWVuZXotYWd1MTgucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFUAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABww==}}
@inproceedings{lefevre:snia20,
address = {Virtual},
author = {Jeff LeFevre and Carlos Maltzahn},
booktitle = {SNIA SDC 2020},
date-added = {2023-01-11 22:37:16 -0800},
date-modified = {2023-01-11 22:40:46 -0800},
keywords = {programmable, storage},
month = {September 23},
title = {SkyhookDM: Storage and Management of Tabular Data in Ceph},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS1zbmlhMjAucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EmxlZmV2cmUtc25pYTIwLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////f5OqIAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFMAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkw6bGVmZXZyZS1zbmlhMjAucGRmAAAOACYAEgBsAGUAZgBlAHYAcgBlAC0AcwBuAGkAYQAyADAALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xlZmV2cmUtc25pYTIwLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS1zbmlhMjAtc2xpZGVzLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xlsZWZldnJlLXNuaWEyMC1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+TrUwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxlZmV2cmUtc25pYTIwLXNsaWRlcy5wZGYADgA0ABkAbABlAGYAZQB2AHIAZQAtAHMAbgBpAGEAMgAwAC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xlZmV2cmUtc25pYTIwLXNsaWRlcy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}
@inproceedings{chakraborty:sdc21,
address = {Virtual},
author = {Jayjeet Chakraborty and Carlos Maltzahn},
booktitle = {SNIA SDC 2021},
date-added = {2023-01-11 22:30:29 -0800},
date-modified = {2023-01-11 22:32:09 -0800},
keywords = {programmable, storage},
month = {September 28-29},
title = {{SkyhookDM: An Arrow-Native Storage System}},
year = {2021},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktc25pYTIxLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZjaGFrcmFib3J0eS1zbmlhMjEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+TplAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpDOmNoYWtyYWJvcnR5LXNuaWEyMS5wZGYAAA4ALgAWAGMAaABhAGsAcgBhAGIAbwByAHQAeQAtAHMAbgBpAGEAMgAxAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaGFrcmFib3J0eS1zbmlhMjEucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA4Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktc25pYTIxLXNsaWRlcy5wZGZPEQGUAAAAAAGUAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8dY2hha3JhYm9ydHktc25pYTIxLXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9/k6/EAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAQi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaGFrcmFib3J0eS1zbmlhMjEtc2xpZGVzLnBkZgAOADwAHQBjAGgAYQBrAHIAYQBiAG8AcgB0AHkALQBzAG4AaQBhADIAMQAtAHMAbABpAGQAZQBzAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBAVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaGFrcmFib3J0eS1zbmlhMjEtc2xpZGVzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABfAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfc=}}
@inproceedings{malik:precs22,
author = {Tanu Malik and Anjo Vahldiek-Oberwagner and Ivo Jimenez and Carlos Maltzahn},
booktitle = {P-RECS'22},
date-added = {2023-01-11 21:05:52 -0800},
date-modified = {2023-01-11 21:07:18 -0800},
doi = {10.1145/3526062.3536354},
keywords = {reproducibility},
title = {{Expanding the Scope of Artifact Evaluation at HPC Conferences: Experience of SC21}},
year = {2022},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsaWstcHJlY3MyMi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbWFsaWstcHJlY3MyMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9/k1PsAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAU0AAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TTptYWxpay1wcmVjczIyLnBkZgAOACQAEQBtAGEAbABpAGsALQBwAHIAZQBjAHMAMgAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWxpay1wcmVjczIyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}
@unpublished{zakaria:nixcon22,
abstract = {Nix has introduced the world to store-based systems and ushered a new wave of reproducibility. These new systems however are built atop long established patterns and occasionally leverage them to band-aid over the problems Nix aims to solve.
How much further can we leverage the store abstraction to rethink long valued established patterns in Unix based operating systems? This talk will introduce some of the simple improvements one can uncover starting at the linking phase of object building and process startup.
The authors introduce Shrinkwrap which can greatly improve startup performance and further improve reproducibility for applications ported to Nix by making simple improvement to how libraries are discovered and leveraging the store further. Additional explorations for improvements during the linking phase will be discussed and explored. It's time we rethink everything.
},
author = {Farid Zakaria and Tom Scogland and Carlos Maltzahn},
date-added = {2022-11-07 19:32:09 -0800},
date-modified = {2022-11-07 19:32:09 -0800},
keywords = {linking, reproducibility, packaging},
month = {October 20-22},
note = {NixCon 2022, Paris, France},
title = {Rethinking basic primitives for store based systems},
year = {2022},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1gtWi96YWthcmlhLW5peGNvbjIyLnBkZk8RAXgAAAAAAXgAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xR6YWthcmlhLW5peGNvbjIyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////348MogAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADWC1aAAACADsvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlgtWjp6YWthcmlhLW5peGNvbjIyLnBkZgAADgAqABQAegBhAGsAYQByAGkAYQAtAG4AaQB4AGMAbwBuADIAMgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1gtWi96YWthcmlhLW5peGNvbjIyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHU},
bdsk-url-1 = {https://drive.google.com/file/d/1uFE5UfvteXxkM4KCOjbSh52yGPa2hZtg/view}}
@unpublished{nsf:repeto22,
abstract = {The Repeto project will foster community practices to make reproducibility a part of mainstream research and education activities in computer science. The project seeks to understand the cost/benefit equation of reproducibility for the computer science systems community, the factors that make reproducibility feasible or infeasible, as well as isolate factors (be they technical or usage oriented) that make practical reproducibility of experiments difficult. This research coordination network will develop a range of activities from teaching methodology for packaging experiments for cost-effective replication; using reproducibility in teaching; collaboration with reproducibility initiatives sponsored through conferences and institutions; community events emphasizing repeating or replicating experiments such as hackathons, competitions, or rankings; fostering repositories of replicable experiments and monitoring their usage/replication; to reporting on state of art and emergent requirements for the support of practical reproducibility. The outcomes of the proposal will be a collection of computer science experiments replicable on open platforms, an understanding of how much and to what extent they are used in mainstream research and education activities via relevant metrics, as well as a series of reports on current enablers and obstacles towards mainstream use of reproducibility in computer science research.
Replicable experiments will be created using platform programmability tools including the Chameleon environment and associated software such as CHI, Trovi, and Jupyter notebooks. This platform programmability approach allows experimenters to express complex experimental topologies in repeatable and persistent ways. Combining platform programmability with executable notebooks will allow investigators to capture the full experimental process for subsequent replication by other researchers.
This award by the CISE Office of Advanced Cyberinfrastructure is jointly supported by the CISE Computer and Networked Systems Division.
This award reflects NSF's statutory mission and has been deemed worthy of support through evaluation using the Foundation's intellectual merit and broader impacts review criteria.},
author = {{National Science Foundation -- Office of Advanced Cyberinfrastructure (OAC)}},
date-added = {2022-08-16 17:33:00 -0700},
date-modified = {2022-08-16 18:27:26 -0700},
keywords = {funding},
month = {October},
note = {Available at www.nsf.gov/awardsearch/showAward?AWD\_ID=2226407},
title = {Collaborative Research: Disciplinary Improvements: Repeto: Building a Network for Practical Reproducibility in Experimental Computer Science},
year = {2022}}
@inproceedings{liu:hpec22,
abstract = {Many distributed applications implement complex data flows and need a flexible mechanism for routing data between producers and consumers. Recent advances in programmable network interface cards, or SmartNICs, represent an opportunity to offload data-flow tasks into the network fabric, thereby freeing the hosts to perform other work. System architects in this space face multiple questions about the best way to leverage SmartNICs as processing elements in data flows. In this paper, we advocate the use of Apache Arrow as a foundation to implement data flow tasks on SmartNICs. We report on our experience adapting a partitioning algorithm for particle data to Apache Arrow and measure the on-card processing performance for the BlueField-2 SmartNIC. Our experiments confirm that the BlueField-2's (de)compression hardware can have a significant impact on in-transit workflows where data must be unpacked, processed, and repacked.},
address = {Virtual Event},
author = {Jianshen Liu and Carlos Maltzahn and Matthew L. Curry and Craig Ulmer},
booktitle = {2022 IEEE High Performance Extreme Computing Conference (IEEE HPEC 2022)},
date-added = {2022-08-16 17:08:46 -0700},
date-modified = {2022-08-16 18:44:04 -0700},
keywords = {smartnics, offloading, datamanagement, hpc},
month = {September 19-23},
title = {{Processing Particle Data Flows with SmartNICs}},
year = {2022},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWhwZWMyMi5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8ObGl1LWhwZWMyMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////981GukAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaXUtaHBlYzIyLnBkZgAADgAeAA4AbABpAHUALQBoAHAAZQBjADIAMgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWhwZWMyMi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==}}
@inproceedings{zakaria:sc22,
abstract = {High Performance Computing (HPC) software stacks have become complex, with the dependencies of some applications numbering in the hundreds. Packaging, distributing, and administering software stacks of that scale is a complex undertaking anywhere. HPC systems deal with esoteric compilers, hardware, and a panoply of uncommon combinations. In this paper, we explore the mechanisms available for packaging software to find its own dependencies in the context of a taxonomy of software distribution, and discuss their benefits and pitfalls. We discuss workarounds for some common problems caused by using these composed stacks and introduce Shrinkwrap: A solution to producing binaries that directly load their dependencies from precise locations and in a precise order. Beyond simplifying the use of the binaries, this approach also speeds up loading as much as 7× for a large dynamically-linked MPI application in our evaluation.},
address = {Dallas, TX},
author = {Farid Zakaria and Thomas R. W. Scogland and Todd Gamblin and Carlos Maltzahn},
booktitle = {SC22},
date-added = {2022-08-09 12:51:12 -0700},
date-modified = {2022-08-16 18:42:03 -0700},
keywords = {linking, packaging, softwareengineering, oss, reproducibility, compiler},
month = {November 13-18},
title = {Mapping Out the HPC Dependency Chaos},
year = {2022},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1gtWi96YWthcmlhLXNjMjIucGRmTxEBaAAAAAABaAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EHpha2FyaWEtc2MyMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////fGAhHAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANYLVoAAAIANy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6WC1aOnpha2FyaWEtc2MyMi5wZGYAAA4AIgAQAHoAYQBrAGEAcgBpAGEALQBzAGMAMgAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA1VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvWC1aL3pha2FyaWEtc2MyMi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFQAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABwA==}}
@unpublished{sloan:ucospo22,
author = {{Alfred P. Sloan Foundation -- Better Software for Science Program}},
date-added = {2022-08-04 06:46:49 -0700},
date-modified = {2022-08-04 06:50:01 -0700},
keywords = {funding},
month = {January},
note = {Available at sloan.org/grant-detail/9723},
title = {{To pilot a postdoctoral fellowship on open source software development and support other activities at the University of California Santa Cruz Open Source Program Office}},
year = {2022}}
@article{lieggi:rhrq22,
author = {Stephanie Lieggi},
date-added = {2022-05-10 16:11:16 -0700},
date-modified = {2022-05-10 16:11:48 -0700},
journal = {Red Hat Research Quarterly},
keywords = {oss, ospo, academia},
month = {February},
number = {4},
pages = {5--6},
title = {Building a university {OSPO}: Bolstering academic research through open source},
volume = {3},
year = {2022},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJocnEyMi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbGllZ2dpLXJocnEyMi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////96gPtQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaWVnZ2ktcmhycTIyLnBkZgAOACQAEQBsAGkAZQBnAGcAaQAtAHIAaAByAHEAMgAyAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9saWVnZ2ktcmhycTIyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}
@unpublished{chakraborty:arrowblog22,
author = {Jayjeet Chakraborty and Carlos Maltzahn and David Li and Tom Drabas},
date-added = {2022-05-06 12:28:50 -0700},
date-modified = {2022-05-06 12:28:50 -0700},
keywords = {computation, storage, programmable, datamanagement, ceph, arrow},
month = {January 31},
note = {Available at arrow.apache.org/blog/2022/01/31/skyhook-bringing-computation-to-storage-with-apache-arrow/},
title = {Skyhook: Bringing Computation to Storage with Apache Arrow},
year = {2022},
bdsk-url-1 = {https://arrow.apache.org/blog/2022/01/31/skyhook-bringing-computation-to-storage-with-apache-arrow/}}
@inproceedings{chakraborty:ccgrid22,
abstract = {With the ever-increasing dataset sizes, several file formats such as Parquet, ORC, and Avro have been developed to store data efficiently, save the network, and interconnect bandwidth at the price of additional CPU utilization. However, with the advent of networks supporting 25-100 Gb/s and storage devices delivering 1,000,000 reqs/sec, the CPU has become the bottleneck trying to keep up feeding data in and out of these fast devices. The result is that data access libraries executed on single clients are often CPU-bound and cannot utilize the scale-out benefits of distributed storage systems. One attractive solution to this problem is to offload data-reducing processing and filtering tasks to the storage layer. However, modifying legacy storage systems to support compute offloading is often tedious and requires an extensive understanding of the system internals. Previous approaches re-implemented functionality of data processing frameworks and access libraries for a particular storage system, a duplication of effort that might have to be repeated for different storage systems.
This paper introduces a new design paradigm that allows extending programmable object storage systems to embed existing, widely used data processing frameworks and access libraries into the storage layer with no modifications. In this approach, data processing frameworks and access libraries can evolve independently from storage systems while leveraging distributed storage systems' scale-out and availability properties. We present Skyhook, an example implementation of our design paradigm using Ceph, Apache Arrow, and Parquet. We provide a brief performance evaluation of Skyhook and discuss key results.},
address = {Taormina (Messina), Italy},
author = {Jayjeet Chakraborty and Ivo Jimenez and Sebastiaan Alvarez Rodriguez and Alexandru Uta and Jeff LeFevre and Carlos Maltzahn},
booktitle = {CCGrid22},
date-added = {2022-04-11 19:45:31 -0700},
date-modified = {2022-04-11 19:57:58 -0700},
keywords = {papers, programmable, storage, systems, arrow, nsf1836650, nsf1705021, nsf1764102},
month = {May 16-19},
title = {Skyhook: Towards an Arrow-Native Storage System},
year = {2022},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktY2NncmlkMjIucGRmTxEBggAAAAABggACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////GGNoYWtyYWJvcnR5LWNjZ3JpZDIyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////eejbkAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACAD0vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y2hha3JhYm9ydHktY2NncmlkMjIucGRmAAAOADIAGABjAGgAYQBrAHIAYQBiAG8AcgB0AHkALQBjAGMAZwByAGkAZAAyADIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADtVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9DL2NoYWtyYWJvcnR5LWNjZ3JpZDIyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHg}}
@article{harrell:tpds22,
abstract = {In this special section we bring you a practice and experience effort in reproducibility for large-scale computational science at SC20. This section includes nine critiques, each by a student team that reproduced results from a paper published at SC19, during the following year's Student Cluster Competition. The paper is also included in this section and has been expanded upon, now including an analysis of the outcomes of the students' reproducibility experiments. Lastly, this special section encapsulates a variety of advances in reproducibility in the SC conference series technical program.},
author = {Stephen Lien Harrell and Scott Michael and Carlos Maltzahn},
date-added = {2022-04-11 19:38:53 -0700},
date-modified = {2022-04-11 19:42:38 -0700},
journal = {IEEE Transactions on Parallel and Distributed Systems},
keywords = {reproducibility, conference, hpc},
month = {September},
number = {9},
pages = {2011--2013},
title = {Advancing Adoption of Reproducibility in HPC: A Preface to the Special Section},
volume = {33},
year = {2022},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGFycmVsbC10cGRzMjIucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EmhhcnJlbGwtdHBkczIyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////eejVfAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFIAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkg6aGFycmVsbC10cGRzMjIucGRmAAAOACYAEgBoAGEAcgByAGUAbABsAC0AdABwAGQAcwAyADIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9IL2hhcnJlbGwtdHBkczIyLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}
@inproceedings{rodriguez:bigdata21,
abstract = {Distributed data processing ecosystems are widespread and their components are highly specialized, such that efficient interoperability is urgent. Recently, Apache Arrow was chosen by the community to serve as a format mediator, providing efficient in-memory data representation. Arrow enables efficient data movement between data processing and storage engines, significantly improving interoperability and overall performance. In this work, we design a new zero-cost data interoperability layer between Apache Spark and Arrow-based data sources through the Arrow Dataset API. Our novel data interface helps separate the computation (Spark) and data (Arrow) layers. This enables practitioners to seamlessly use Spark to access data from all Arrow Dataset API-enabled data sources and frameworks. To benefit our community, we open-source our work and show that consuming data through Apache Arrow is zero-cost: our novel data interface is either on-par or more performant than native Spark.},
address = {Virtual Event},
author = {Sebastiaan Alvarez Rodriguez and Jayjeet Chakraborty and Aaron Chu and Ivo Jimenez and Jeff LeFevre and Carlos Maltzahn and Alexandru Uta},
booktitle = {2021 IEEE International Conference on Big Data (IEEE BigData 2021)},
date-added = {2022-04-11 19:33:51 -0700},
date-modified = {2022-04-11 19:59:07 -0700},
keywords = {papers, spark, arrow, performance, nsf1836650},
month = {December 15-18},
title = {Zero-Cost, Arrow-Enabled Data Interface for Apache Spark},
year = {2021},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1EtUi9yb2RyaWd1ZXotYmlnZGF0YTIxLnBkZk8RAYIAAAAAAYIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xdyb2RyaWd1ZXotYmlnZGF0YTIxLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3nozqQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADUS1SAAACAD4vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlEtUjpyb2RyaWd1ZXotYmlnZGF0YTIxLnBkZgAOADAAFwByAG8AZAByAGkAZwB1AGUAegAtAGIAaQBnAGQAYQB0AGEAMgAxAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA8VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUS1SL3JvZHJpZ3Vlei1iaWdkYXRhMjEucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFsAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB4Q==}}
@unpublished{rodriguez:arxiv21,
abstract = {Distributed data processing ecosystems are widespread and their components are highly specialized, such that efficient interoperability is urgent. Recently, Apache Arrow was chosen by the community to serve as a format mediator, providing efficient in-memory data representation. Arrow enables efficient data movement between data processing and storage engines, significantly improving interoperability and overall performance. In this work, we design a new zero-cost data interoperability layer between Apache Spark and Arrow-based data sources through the Arrow Dataset API. Our novel data interface helps separate the computation (Spark) and data (Arrow) layers. This enables practitioners to seamlessly use Spark to access data from all Arrow Dataset API-enabled data sources and frameworks. To benefit our community, we open-source our work and show that consuming data through Apache Arrow is zero-cost: our novel data interface is either on-par or more performant than native Spark.},
author = {Sebastiaan Alvarez Rodriguez and Jayjeet Chakraborty and Aaron Chu and Ivo Jimenez and Jeff LeFevre and Carlos Maltzahn and Alexandru Uta},
date-added = {2021-07-23 11:42:12 -0700},
date-modified = {2021-07-23 11:55:28 -0700},
keywords = {papers, spark, arrow, performance},
month = {June 24},
note = {arxiv.org/abs/2106.13020 [cs.DC]},
title = {Zero-Cost, Arrow-Enabled Data Interface for Apache Spark},
year = {2021},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1EtUi9yb2RyaWd1ZXotYXJ4aXYyMS5wZGZPEQF6AAAAAAF6AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Vcm9kcmlndWV6LWFyeGl2MjEucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////90gXGEAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA1EtUgAAAgA8LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpRLVI6cm9kcmlndWV6LWFyeGl2MjEucGRmAA4ALAAVAHIAbwBkAHIAaQBnAHUAZQB6AC0AYQByAHgAaQB2ADIAMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1EtUi9yb2RyaWd1ZXotYXJ4aXYyMS5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHX}}
@unpublished{liu:arxiv21,
abstract = {High-performance computing (HPC) researchers have long envisioned scenarios where application workflows could be improved through the use of programmable processing elements embedded in the network fabric. Recently, vendors have introduced programmable Smart Network Interface Cards (SmartNICs) that enable computations to be offloaded to the edge of the network. There is great interest in both the HPC and high-performance data analytics communities in understanding the roles these devices may play in the data paths of upcoming systems.
This paper focuses on characterizing both the networking and computing aspects of NVIDIA's new BlueField-2 SmartNIC when used in an Ethernet environment. For the networking evaluation we conducted multiple transfer experiments between processors located at the host, the SmartNIC, and a remote host. These tests illuminate how much processing headroom is available on the SmartNIC during transfers. For the computing evaluation we used the stress-ng benchmark to compare the BlueField-2 to other servers and place realistic bounds on the types of offload operations that are appropriate for the hardware.
Our findings from this work indicate that while the BlueField-2 provides a flexible means of processing data at the network's edge, great care must be taken to not overwhelm the hardware. While the host can easily saturate the network link, the SmartNIC's embedded processors may not have enough computing resources to sustain more than half the expected bandwidth when using kernel-space packet processing. From a computational perspective, encryption operations, memory operations under contention, and on-card IPC operations on the SmartNIC perform significantly better than the general-purpose servers used for comparisons in our experiments. Therefore, applications that mainly focus on these operations may be good candidates for offloading to the SmartNIC. },
author = {Jianshen Liu and Carlos Maltzahn and Craig Ulmer and Matthew Leon Curry},
date-added = {2021-07-23 11:37:49 -0700},
date-modified = {2021-07-23 12:02:34 -0700},
keywords = {papers, smartnics, performance},
month = {May 14},
note = {arxiv.org/abs/2105.06619 [cs.NI]},
title = {Performance Characteristics of the BlueField-2 SmartNIC},
year = {2021},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAqLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWFyeGl2MjEucGRmTxEBXAAAAAABXAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////D2xpdS1hcnhpdjIxLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////dIFtZAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFMAAACADQvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkw6bGl1LWFyeGl2MjEucGRmAA4AIAAPAGwAaQB1AC0AYQByAHgAaQB2ADIAMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWFyeGl2MjEucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFEAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABsQ==},
bdsk-url-1 = {https://www.nextplatform.com/2021/05/24/testing-the-limits-of-the-bluefield-2-smartnic/}}
@unpublished{chakraborty:arxiv21,
abstract = {With the ever-increasing dataset sizes, several file formats like Parquet, ORC, and Avro have been developed to store data efficiently and to save network and interconnect bandwidth at the price of additional CPU utilization. However, with the advent of networks supporting 25-100 Gb/s and storage devices delivering 1,000,000 reqs/sec the CPU has become the bottleneck, trying to keep up feeding data in and out of these fast devices. The result is that data access libraries executed on single clients are often CPU-bound and cannot utilize the scale-out benefits of distributed storage systems. One attractive solution to this problem is to offload data-reducing processing and filtering tasks to the storage layer. However, modifying legacy storage systems to support compute offloading is often tedious and requires extensive understanding of the internals. Previous approaches re-implemented functionality of data processing frameworks and access library for a particular storage system, a duplication of effort that might have to be repeated for different storage systems. In this paper, we introduce a new design paradigm that allows extending programmable object storage systems to embed existing, widely used data processing frameworks and access libraries into the storage layer with minimal modifications. In this approach data processing frameworks and access libraries can evolve independently from storage systems while leveraging the scale-out and availability properties of distributed storage systems. We present one example implementation of our design paradigm using Ceph, Apache Arrow, and Parquet. We provide a brief performance evaluation of our implementation and discuss key results. },
author = {Jayjeet Chakraborty and Ivo Jimenez and Sebastiaan Alvarez Rodriguez and Alexandru Uta and Jeff LeFevre and Carlos Maltzahn},
date-added = {2021-07-23 10:50:21 -0700},
date-modified = {2021-07-23 13:47:37 -0700},
keywords = {papers, programmable, storage, systems, arrow},
month = {May 21},
note = {arxiv.org/abs/2105.09894 [cs.DC]},
title = {Towards an Arrow-native Storage System},
year = {2021},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktYXJ4aXYyMS5wZGZPEQF8AAAAAAF8AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8XY2hha3JhYm9ydHktYXJ4aXYyMS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9zcGnQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAPC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaGFrcmFib3J0eS1hcnhpdjIxLnBkZgAOADAAFwBjAGgAYQBrAHIAYQBiAG8AcgB0AHkALQBhAHIAeABpAHYAMgAxAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA6VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaGFrcmFib3J0eS1hcnhpdjIxLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABZAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdk=}}
@article{chu:epjconf20,
abstract = {Access libraries such as ROOT and HDF5 allow users to interact with datasets using high level abstractions, like coordinate systems and associated slicing operations. Unfortunately, the implementations of access libraries are based on outdated assumptions about storage systems interfaces and are generally unable to fully benefit from modern fast storage devices. For example, access libraries often implement buffering and data layout that assume that large, single-threaded sequential access patterns are causing less overall latency than small parallel random access: while this is true for spinning media, it is not true for flash media. The situation is getting worse with rapidly evolving storage devices such as non-volatile memory and ever larger datasets. Our Skyhook Dataset Mapping project explores distributed dataset mapping infrastructures that can integrate and scale out existing access libraries using Ceph's extensible object model, avoiding reimplementation or even modifications of these access libraries as much as possible. These programmable storage extensions coupled with our distributed dataset mapping techniques enable: 1) access library operations to be offloaded to storage system servers, 2) the independent evolution of access libraries and storage systems and 3) fully leveraging of the existing load balancing, elasticity, and failure management of distributed storage systems like Ceph. They also create more opportunities to conduct storage server-local optimizations specific to storage servers. For example, storage servers might include local key/value stores combined with chunk stores that require different optimizations than a local file system. As storage servers evolve to support new storage devices like non-volatile memory, these server-local optimizations can be implemented while minimizing disruptions to applications. We will report progress on the means by which distributed dataset mapping can be abstracted over particular access libraries, including access libraries for ROOT data, and how we address some of the challenges revolving around data partitioning and composability of access operations.},
author = {Aaron Chu and Jeff LeFevre and Carlos Maltzahn and Aldrin Montana and Peter Alvaro and Dana Robinson and Quincey Koziol},
date-added = {2020-12-10 16:45:30 -0800},
date-modified = {2022-07-02 17:49:58 -0700},
journal = {EPJ Web Conf.},
keywords = {papers, programmable, declarative, objectstorage, nsf1836650},
month = {November 16},
number = {2020},
title = {Mapping Datasets to Programmable Storage},
volume = {245, 04037},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWVwamNvbmYyMC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RY2h1LWVwamNvbmYyMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9v4AnQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaHUtZXBqY29uZjIwLnBkZgAOACQAEQBjAGgAdQAtAGUAcABqAGMAbwBuAGYAMgAwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaHUtZXBqY29uZjIwLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS1zbGlkZXMucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWNodS1jaGVwMTktc2xpZGVzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////bR5eSAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y2h1LWNoZXAxOS1zbGlkZXMucGRmAA4ALAAVAGMAaAB1AC0AYwBoAGUAcAAxADkALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==},
bdsk-url-1 = {https://indico.cern.ch/event/773049/contributions/3474413/}}
@inproceedings{lieggi:rse-hpc20,
author = {Stephanie Lieggi and Ivo Jimenez and Jeff LeFevre and Carlos Maltzahn},
booktitle = {RSE-HPC -- Introduction: Research Software Engineers in HPC: Creating Community, Building Careers, Addressing Challenges, co-located with SC20},
date-added = {2020-11-30 12:29:24 -0800},
date-modified = {2020-11-30 12:31:45 -0800},
keywords = {papers, softwareengineering, oss, cross},
month = {November 12},
title = {The CROSS Incubator: A Case Study for funding and training RSEs},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJzZS1ocGMyMC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbGllZ2dpLXJzZS1ocGMyMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////97rH54AAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaWVnZ2ktcnNlLWhwYzIwLnBkZgAADgAqABQAbABpAGUAZwBnAGkALQByAHMAZQAtAGgAcABjADIAMAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJzZS1ocGMyMC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJzZS1ocGMyMC1zbGlkZXMucGRmTxEBjAAAAAABjAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////G2xpZWdnaS1yc2UtaHBjMjAtc2xpZGVzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////b9ZnGAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFMAAACAEAvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkw6bGllZ2dpLXJzZS1ocGMyMC1zbGlkZXMucGRmAA4AOAAbAGwAaQBlAGcAZwBpAC0AcgBzAGUALQBoAHAAYwAyADAALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGllZ2dpLXJzZS1ocGMyMC1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAF0AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB7Q==}}
@inproceedings{chakraborty:canopie20,
author = {Jayjeet Chakraborty and Carlos Maltzahn and Ivo Jimenez},
booktitle = {CANOPIE HPC 2020 (at SC20)},
date-added = {2020-11-30 07:28:21 -0800},
date-modified = {2022-04-11 19:55:33 -0700},
keywords = {papers, reproducibility, containers, workflowl, orchestration, nsf1836650},
month = {November 12},
title = {Enabling seamless execution of computational and data science workflows on HPC and cloud with the Popper container-native automation engine},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktY2Fub3BpZTIwLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xljaGFrcmFib3J0eS1jYW5vcGllMjAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2+pOygAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABQwAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpDOmNoYWtyYWJvcnR5LWNhbm9waWUyMC5wZGYADgA0ABkAYwBoAGEAawByAGEAYgBvAHIAdAB5AC0AYwBhAG4AbwBwAGkAZQAyADAALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9DL2NoYWtyYWJvcnR5LWNhbm9waWUyMC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA8Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktY2Fub3BpZS0yMC1zbGlkZXMucGRmTxEBpAAAAAABpAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////H2NoYWtyYWJvcnR5LWNhbm9waSNGRkZGRkZGRi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////b6k8SAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACAEYvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y2hha3JhYm9ydHktY2Fub3BpZS0yMC1zbGlkZXMucGRmAA4ARAAhAGMAaABhAGsAcgBhAGIAbwByAHQAeQAtAGMAYQBuAG8AcABpAGUALQAyADAALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIARFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktY2Fub3BpZS0yMC1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAGMAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAACCw==}}
@article{lefevre:login20,
author = {Jeff LeFevre and Carlos Maltzahn},
date-added = {2020-06-12 18:36:51 -0700},
date-modified = {2020-07-01 12:34:36 -0700},
journal = {USENIX ;login:},
keywords = {papers, programmable, storage, ceph, physicaldesign, cross, nsf1836650, nsf1764102, nsf1705021},
number = {2},
title = {SkyhookDM: Data Processing in Ceph with Programmable Storage},
volume = {45},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS1sb2dpbjIwLnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNsZWZldnJlLWxvZ2luMjAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2wl6sgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxlZmV2cmUtbG9naW4yMC5wZGYADgAoABMAbABlAGYAZQB2AHIAZQAtAGwAbwBnAGkAbgAyADAALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xlZmV2cmUtbG9naW4yMC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}
@inproceedings{liu:hotedge20,
address = {Boston, MA},
author = {Jianshen Liu and Matthew Leon Curry and Carlos Maltzahn and Philip Kufeldt},
booktitle = {HotEdge'20},
date-added = {2020-04-19 12:38:42 -0700},
date-modified = {2020-07-01 12:35:59 -0700},
keywords = {papers, edge, reliability, disaggregation, embedded, failures, cross, nsf1836650, nsf1764102, nsf1705021},
month = {July 14},
title = {Scale-out Edge Storage Systems with Embedded Storage Nodes to Get Better Availability and Cost-Efficiency At the Same Time},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWhvdGVkZ2UyMC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbGl1LWhvdGVkZ2UyMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9sdgrIAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaXUtaG90ZWRnZTIwLnBkZgAOACQAEQBsAGkAdQAtAGgAbwB0AGUAZABnAGUAMgAwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9saXUtaG90ZWRnZTIwLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}
@inproceedings{chu:irishep20poster,
address = {Princeton, NJ},
author = {Aaron Chu and Ivo Jimenez and Jeff LeFevre and Carlos Maltzahn},
booktitle = {Poster at IRIS-HEP Poster Session},
date-added = {2020-03-09 22:19:08 -0700},
date-modified = {2020-07-01 12:36:40 -0700},
keywords = {poster, programmable, storage, hep, nsf1836650},
month = {February 27},
title = {SkyhookDM: Programmable Storage for Datasets},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWlyaXNoZXAyMHBvc3Rlci5wZGZPEQF8AAAAAAF8AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8XY2h1LWlyaXNoZXAyMHBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9qMb7UAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAPC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaHUtaXJpc2hlcDIwcG9zdGVyLnBkZgAOADAAFwBjAGgAdQAtAGkAcgBpAHMAaABlAHAAMgAwAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA6VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaHUtaXJpc2hlcDIwcG9zdGVyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABZAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdk=}}
@inproceedings{chakraborty:ecpam20,
author = {Jayjeet Chakraborty and Ivo Jimenez and Carlos Maltzahn and Arshul Mansoori and Quincy Wofford},
booktitle = {Poster at 2020 Exaxcale Computing Project Annual Meeting, Houston, TX, February 3-7, 2020},
date-added = {2020-02-05 11:34:01 -0800},
date-modified = {2022-04-11 19:54:42 -0700},
keywords = {shortpapers, reproducibility, containers, workflow, automation, cross, nsf1836650},
title = {Popper 2.0: A Container-native Workflow Execution Engine For Testing Complex Applications and Validating Scientific Claims},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAyLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2hha3JhYm9ydHktZWNwYW0yMC5wZGZPEQF8AAAAAAF8AAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8XY2hha3JhYm9ydHktZWNwYW0yMC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9pgUJQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAPC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaGFrcmFib3J0eS1lY3BhbTIwLnBkZgAOADAAFwBjAGgAYQBrAHIAYQBiAG8AcgB0AHkALQBlAGMAcABhAG0AMgAwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA6VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvQy9jaGFrcmFib3J0eS1lY3BhbTIwLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABZAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdk=},
bdsk-url-1 = {https://ecpannualmeeting.com/}}
@inproceedings{chu:chep19,
abstract = {Access libraries such as ROOT and HDF5 allow users to interact with datasets using high level abstractions, like coordinate systems and associated slicing operations. Unfortunately, the implementations of access libraries are based on outdated assumptions about storage systems interfaces and are generally unable to fully benefit from modern fast storage devices. For example, access libraries often implement buffering and data layout that assume that large, single-threaded sequential access patterns are causing less overall latency than small parallel random access: while this is true for spinning media, it is not true for flash media. The situation is getting worse with rapidly evolving storage devices such as non-volatile memory and ever larger datasets. Our Skyhook Dataset Mapping project explores distributed dataset mapping infrastructures that can integrate and scale out existing access libraries using Ceph's extensible object model, avoiding reimplementation or even modifications of these access libraries as much as possible. These programmable storage extensions coupled with our distributed dataset mapping techniques enable: 1) access library operations to be offloaded to storage system servers, 2) the independent evolution of access libraries and storage systems and 3) fully leveraging of the existing load balancing, elasticity, and failure management of distributed storage systems like Ceph. They also create more opportunities to conduct storage server-local optimizations specific to storage servers. For example, storage servers might include local key/value stores combined with chunk stores that require different optimizations than a local file system. As storage servers evolve to support new storage devices like non-volatile memory, these server-local optimizations can be implemented while minimizing disruptions to applications. We will report progress on the means by which distributed dataset mapping can be abstracted over particular access libraries, including access libraries for ROOT data, and how we address some of the challenges revolving around data partitioning and composability of access operations.},
address = {Adelaide, Australia},
author = {Aaron Chu and Jeff LeFevre and Carlos Maltzahn and Aldrin Montana and Peter Alvaro and Dana Robinson and Quincey Koziol},
booktitle = {24th International Conference on Computing in High Energy \& Nuclear Physics)},
date-added = {2020-01-20 16:19:51 -0800},
date-modified = {2020-07-30 14:13:11 -0700},
keywords = {papers, programmable, declarative, objectstorage, nsf1836650},
month = {November 4-8},
publisher = {arXiv:2007.01789v1 (Submitted for publication)},
title = {SkyhookDM: Mapping Scientific Datasets to Programmable Storage},
year = {2019},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8OY2h1LWNoZXAxOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9tHl+cAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUMAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6QzpjaHUtY2hlcDE5LnBkZgAADgAeAA4AYwBoAHUALQBjAGgAZQBwADEAOQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS1zbGlkZXMucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWNodS1jaGVwMTktc2xpZGVzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////bR5eSAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFDAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkM6Y2h1LWNoZXAxOS1zbGlkZXMucGRmAA4ALAAVAGMAaAB1AC0AYwBoAGUAcAAxADkALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0MvY2h1LWNoZXAxOS1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==},
bdsk-url-1 = {https://indico.cern.ch/event/773049/contributions/3474413/}}
@inproceedings{weil:lsf07,
address = {San Jose, CA},
author = {Sage Weil and Scott A. Brandt and Carlos Maltzahn},
booktitle = {Linux Storage and Filesystem Workshop (LSF07), held in conjunction with the Conference on File and Storage Technology (FAST 07)},
date-added = {2019-12-29 16:46:38 -0800},
date-modified = {2019-12-29 16:46:38 -0800},
keywords = {shortpapers, storage, scalable},
month = {February 12--13},
title = {Scaling Linux Storage to Petabytes},
year = {2007},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1sc2YwNy5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Od2VpbC1sc2YwNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9ouiPYAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVcAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6Vzp3ZWlsLWxzZjA3LnBkZgAADgAeAA4AdwBlAGkAbAAtAGwAcwBmADAANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1cvd2VpbC1sc2YwNy5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==}}
@inproceedings{estolano:fast08wip,
address = {San Jose, CA},
author = {Esteban Molina-Estolano and Carlos Maltzahn and Sage Weil and Scott Brandt},
booktitle = {Work-in-Progress Session of the USENIX Conference on File and Storage Technology (FAST 2008)},
date-added = {2019-12-29 16:38:04 -0800},
date-modified = {2019-12-29 16:39:22 -0800},
keywords = {shortpapers, loadbalancing, objectstorage, distributed, storage},
month = {February 26-29},
title = {Dynamic Load Balancing in Ceph},
year = {2008},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lc3RvbGFuby1mYXN0MDh3aXAucGRmTxEBgAAAAAABgAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FmVzdG9sYW5vLWZhc3QwOHdpcC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLob5AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANFLUYAAAIAPS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RS1GOmVzdG9sYW5vLWZhc3QwOHdpcC5wZGYAAA4ALgAWAGUAcwB0AG8AbABhAG4AbwAtAGYAYQBzAHQAMAA4AHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA7VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvRS1GL2VzdG9sYW5vLWZhc3QwOHdpcC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFoAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}}
@inproceedings{pye:fast08wip,
address = {San Jose, CA},
author = {Ian Pye and Scott Brandt and Carlos Maltzahn},
booktitle = {Work-in-Progress Session of the USENIX Conference on File and Storage Technology (FAST 2008)},
date-added = {2019-12-29 16:29:20 -0800},
date-modified = {2019-12-29 16:30:47 -0800},
keywords = {shortpapers, p2p, filesystems, global},
month = {February 26-29},
title = {Ringer: A Global-Scale Lightweight P2P File Service},
year = {2008},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcHllLWZhc3QwOHdpcC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RcHllLWZhc3QwOHdpcC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9ouhUIAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVAAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UDpweWUtZmFzdDA4d2lwLnBkZgAOACQAEQBwAHkAZQAtAGYAYQBzAHQAMAA4AHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUC9weWUtZmFzdDA4d2lwLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}
@inproceedings{bigelow:fast08wip,
address = {San Jose, CA},
author = {David Bigelow and Scott A. Brandt and Carlos Maltzahn and Sage Weil},
booktitle = {Work-in-Progress Session of the USENIX Conference on File and Storage Technology (FAST 2008)},
date-added = {2019-12-29 16:25:47 -0800},
date-modified = {2019-12-29 16:31:55 -0800},
keywords = {shortpapers, raid, objectstorage},
month = {February 26-29},
title = {Adapting RAID Methods for Use in Object Storage Systems},
year = {2008},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvYmlnZWxvdy1mYXN0MDh3aXAucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWJpZ2Vsb3ctZmFzdDA4d2lwLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLoQoAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFCAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkI6YmlnZWxvdy1mYXN0MDh3aXAucGRmAA4ALAAVAGIAaQBnAGUAbABvAHcALQBmAGEAcwB0ADAAOAB3AGkAcAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0IvYmlnZWxvdy1mYXN0MDh3aXAucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}
@inproceedings{maltzahn:fast08wip,
address = {San Jose, CA},
author = {Carlos Maltzahn},
booktitle = {Work-in-Progress Session of the USENIX Conference on File and Storage Technology (FAST 2008)},
date-added = {2019-12-29 16:18:24 -0800},
date-modified = {2020-01-04 20:29:07 -0700},
keywords = {shortpapers, filesystems, metadata, pim},
month = {February 26-29},
title = {How Private are Home Directories?},
year = {2008},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tZmFzdDA4d2lwLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZtYWx0emFobi1mYXN0MDh3aXAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2i6CqAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWZhc3QwOHdpcC5wZGYAAA4ALgAWAG0AYQBsAHQAegBhAGgAbgAtAGYAYQBzAHQAMAA4AHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWx0emFobi1mYXN0MDh3aXAucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=}}
@inproceedings{bhagwan:scc09,
address = {Bangalore, India},
author = {Varun Bhagwan and Carlos Maltzahn},
booktitle = {Work-In-Progress Session at 2009 IEEE International Conference on Services Computing (SCC 2009)},
date-added = {2019-12-29 16:11:09 -0800},
date-modified = {2019-12-29 16:11:52 -0800},
keywords = {shortpapers, crowdsourcing, metadata, filesystems},
month = {September 21--25},
title = {JabberWocky: Crowd-Sourcing Metadata for Files},
year = {2009},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0IvdmJoYWd3YW4tc2NjMDkucGRmTxEBagAAAAABagACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EnZiaGFnd2FuLXNjYzA5LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aLoEFAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFCAAACADcvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkI6dmJoYWd3YW4tc2NjMDkucGRmAAAOACYAEgB2AGIAaABhAGcAdwBhAG4ALQBzAGMAYwAwADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADVVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9CL3ZiaGFnd2FuLXNjYzA5LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAVAAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHC}}
@inproceedings{wacha:fast10poster,
address = {San Jose, CA},
author = {Rosie Wacha and Scott A. Brandt and Carlos Maltzahn},
booktitle = {In Poster Session at the Conference on File and Storage Technology (FAST 2010)},
date-added = {2019-12-27 10:40:59 -0800},
date-modified = {2019-12-27 10:43:18 -0800},
keywords = {shortpapers, flash, RAID},
month = {February 24-27},
title = {RAID4S: Adding SSDs to RAID Arrays},
year = {2010},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1cvd2FjaGEtZmFzdDEwcG9zdGVyLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZ3YWNoYS1mYXN0MTBwb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iuQ3AAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABVwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpXOndhY2hhLWZhc3QxMHBvc3Rlci5wZGYAAA4ALgAWAHcAYQBjAGgAYQAtAGYAYQBzAHQAMQAwAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvVy93YWNoYS1mYXN0MTBwb3N0ZXIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
bdsk-url-1 = {http://users.soe.ucsc.edu/~carlosm/Papers/S11.pdf}}
@inproceedings{ames:fast10poster,
address = {San Jose, CA},
author = {Sasha Ames and Maya B. Gokhale and Carlos Maltzahn},
booktitle = {Poster Session at the Conference on File and Storage Technology (FAST 2010)},
date-added = {2019-12-26 20:23:07 -0800},
date-modified = {2019-12-29 16:32:23 -0800},
keywords = {shortpapers, filesystems, linking, metadata},
month = {February 24-27},
title = {Design and Implementation of a Metadata-Rich File System},
year = {2010},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1mYXN0MTBwb3N0ZXIucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWFtZXMtZmFzdDEwcG9zdGVyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKsejAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFBAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkE6YW1lcy1mYXN0MTBwb3N0ZXIucGRmAA4ALAAVAGEAbQBlAHMALQBmAGEAcwB0ADEAMABwAG8AcwB0AGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1mYXN0MTBwb3N0ZXIucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}
@inproceedings{polte:pdsw10poster,
address = {New Orleans, LA},
author = {Milo Polte, Esteban Molina-Estolan, John Bent and Garth Gibson and Carlos Maltzahn and Maya B. Gokhale and Scott Brandt},
booktitle = {Poster Session at 5th Petascale Data Storage Workshop (PDSW 2010), co-located with Supercomputing 2010},
date-added = {2019-12-26 20:08:27 -0800},
date-modified = {2019-12-29 16:32:38 -0800},
keywords = {shortpapers, parallel, filesystems, cloudcomputing},
month = {November 15},
title = {PLFS and HDFS: Enabling Parallel Filesystem Semantics In The Cloud},
year = {2010},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtcGRzdzEwcG9zdGVyLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZwb2x0ZS1wZHN3MTBwb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2irETwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUAAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpQOnBvbHRlLXBkc3cxMHBvc3Rlci5wZGYAAA4ALgAWAHAAbwBsAHQAZQAtAHAAZABzAHcAMQAwAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUC9wb2x0ZS1wZHN3MTBwb3N0ZXIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA4Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtcGRzdzEwcG9zdGVyLXBvc3Rlci5wZGZPEQGUAAAAAAGUAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8dcG9sdGUtcGRzdzEwcG9zdGVyLXBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqxJIAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVAAAAIAQi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UDpwb2x0ZS1wZHN3MTBwb3N0ZXItcG9zdGVyLnBkZgAOADwAHQBwAG8AbAB0AGUALQBwAGQAcwB3ADEAMABwAG8AcwB0AGUAcgAtAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBAVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUC9wb2x0ZS1wZHN3MTBwb3N0ZXItcG9zdGVyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABfAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfc=},
bdsk-file-3 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtcGRzdzEwcG9zdGVyLXdpcC5wZGZPEQGKAAAAAAGKAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8acG9sdGUtcGRzdzEwcG9zdGVyLXdpcC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqxLsAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVAAAAIAPy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UDpwb2x0ZS1wZHN3MTBwb3N0ZXItd2lwLnBkZgAADgA2ABoAcABvAGwAdABlAC0AcABkAHMAdwAxADAAcABvAHMAdABlAHIALQB3AGkAcAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1AvcG9sdGUtcGRzdzEwcG9zdGVyLXdpcC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB6g==}}
@inproceedings{ames:pdsw10poster,
address = {New Orleans, LA},
author = {Sasha Ames and Maya B. Gokhale and Carlos Maltzahn},
booktitle = {Session at 5th Petascale Data Storage Workshop (PDSW 2010), co-located with Supercomputing 2010},
date-added = {2019-12-26 20:05:01 -0800},
date-modified = {2019-12-29 16:32:49 -0800},
keywords = {shortpapers, linking, filesystems, metadata},
month = {November 15},
title = {QMDS: A File System Metadata Service Supporting a Graph Data Model-Based Query Language},
year = {2010},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1wZHN3MTBwb3N0ZXIucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWFtZXMtcGRzdzEwcG9zdGVyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKsNwAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFBAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkE6YW1lcy1wZHN3MTBwb3N0ZXIucGRmAA4ALAAVAGEAbQBlAHMALQBwAGQAcwB3ADEAMABwAG8AcwB0AGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0EvYW1lcy1wZHN3MTBwb3N0ZXIucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}
@inproceedings{skourtis:fast13wip,
address = {San Jose, CA},
author = {Dimitris Skourtis and Scott A. Brandt and Carlos Maltzahn},
booktitle = {Work-in-Progress and Poster Session at the Conference on File and Storage Technology (FAST 2013)},
date-added = {2019-12-26 19:57:02 -0800},
date-modified = {2019-12-29 16:34:24 -0800},
keywords = {shortpapers, performance, predictable, flash, redundancy},
month = {February 12-15},
title = {High Performance \& Low Latency in Solid-State Drives Through Redundancy},
year = {2013},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtZmFzdDEzd2lwLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZza291cnRpcy1mYXN0MTN3aXAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2irBvQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNrb3VydGlzLWZhc3QxM3dpcC5wZGYAAA4ALgAWAHMAawBvAHUAcgB0AGkAcwAtAGYAYQBzAHQAMQAzAHcAaQBwAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9za291cnRpcy1mYXN0MTN3aXAucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA4Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2tvdXJ0aXMtZmFzdDEzd2lwLXBvc3Rlci5wZGZPEQGUAAAAAAGUAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8dc2tvdXJ0aXMtZmFzdDEzd2lwLXBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqwfcAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVMAAAIAQi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6Uzpza291cnRpcy1mYXN0MTN3aXAtcG9zdGVyLnBkZgAOADwAHQBzAGsAbwB1AHIAdABpAHMALQBmAGEAcwB0ADEAMwB3AGkAcAAtAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBAVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvUy9za291cnRpcy1mYXN0MTN3aXAtcG9zdGVyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABfAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfc=}}
@inproceedings{lofstead:cluster14poster,
address = {Madrid, Spain},
author = {Jay Lofstead and Ivo Jimenez and Carlos Maltzahn and Quincey Koziol and John Bent and Eric Barton},
booktitle = {in Poster Session at IEEE Cluster 2014},
date-added = {2019-12-26 19:23:07 -0800},
date-modified = {2019-12-29 16:34:56 -0800},
keywords = {shortpapers, storage, parallel, hpc, exascale},
month = {September 22-26},
title = {An Innovative Storage Stack Addressing Extreme Scale Platforms and Big Data Applications},
year = {2014},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA4Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtY2x1c3RlcjE0LXBvc3Rlci5wZGZPEQGUAAAAAAGUAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8dbG9mc3RlYWQtY2x1c3RlcjE0LXBvc3Rlci5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9/k+CEAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAQi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsb2ZzdGVhZC1jbHVzdGVyMTQtcG9zdGVyLnBkZgAOADwAHQBsAG8AZgBzAHQAZQBhAGQALQBjAGwAdQBzAHQAZQByADEANAAtAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBAVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9sb2ZzdGVhZC1jbHVzdGVyMTQtcG9zdGVyLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABfAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAfc=}}
@inproceedings{sevilla:fast14wip,
address = {San Jose, CA},
author = {Michael Sevilla and Scott Brandt and Carlos Maltzahn and Ike Nassi and Sam Fineberg},
booktitle = {Work-in-Progress and Poster Session at the 12th USENIX Conference on File and Storage Technology (FAST 2014)},
date-added = {2019-12-26 19:20:27 -0800},
date-modified = {2019-12-29 16:35:02 -0800},
keywords = {shortpapers, filesystems, metadata, loadbalancing},
month = {February 17-20},
title = {Exploring Resource Migration using the CephFS Metadata cluster},
year = {2014},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA0Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1mYXN0MTQtcG9zdGVyLnBkZk8RAYQAAAAAAYQAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xlzZXZpbGxhLWZhc3QxNC1wb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////3+T2JgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA+LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtZmFzdDE0LXBvc3Rlci5wZGYADgA0ABkAcwBlAHYAaQBsAGwAYQAtAGYAYQBzAHQAMQA0AC0AcABvAHMAdABlAHIALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADxVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtZmFzdDE0LXBvc3Rlci5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAWwAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHj}}
@inproceedings{kufeldt:fast18wip,
address = {Oakland, CA},
author = {Philip Kufeldt and Timothy Feldman and Christine Green and Grant Mackey and Carlos Maltzahn and Shingo Tanaka},
booktitle = {WiP and Poster Sessions at 16th USENIX Conference on File and Storage Technologies (FAST'18)},
date-added = {2019-12-26 19:17:05 -0800},
date-modified = {2019-12-29 16:35:11 -0800},
keywords = {shortpapers, eusocial, embedded, storage},
month = {Feb 12-15},
title = {Eusocial Storage Devices},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva3VmZWxkLWZhc3QxOHdpcC1wb3N0ZXIucGRmTxEBjAAAAAABjAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////G2t1ZmVsZC1mYXN0MTh3aXAtcG9zdGVyLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////Wp7P3AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFLAAACAEAvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOks6a3VmZWxkLWZhc3QxOHdpcC1wb3N0ZXIucGRmAA4AOAAbAGsAdQBmAGUAbABkAC0AZgBhAHMAdAAxADgAdwBpAHAALQBwAG8AcwB0AGUAcgAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPlVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0sva3VmZWxkLWZhc3QxOHdpcC1wb3N0ZXIucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAF0AAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB7Q==}}
@inproceedings{jimenez:xldb18,
address = {Stanford, CA},
author = {Ivo Jimenez and Carlos Maltzahn},
booktitle = {Lightning Talk and Poster Session at the 11th Extremely Large Databases Conference (XLDB)},
date-added = {2019-12-26 19:14:42 -0800},
date-modified = {2019-12-29 16:35:19 -0800},
keywords = {shortpapers, reproducibility},
month = {April 30},
title = {Reproducible Computational and Data-Intensive Experimentation Pipelines with Popper},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA2Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXhsZGIxOC1zbGlkZXMucGRmTxEBigAAAAABigACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////GWppbWVuZXoteGxkYjE4LXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////f5PLgAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIAQC86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXoteGxkYjE4LXNsaWRlcy5wZGYADgA0ABkAagBpAG0AZQBuAGUAegAtAHgAbABkAGIAMQA4AC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD5Vc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei14bGRiMTgtc2xpZGVzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABdAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAes=},
bdsk-url-1 = {https://www.youtube.com/watch?v=HXk_nVq8D00&list=PLE1UFlsTj5AHNXntohlhH9nYgXGU2ZqOU&index=32}}
@inproceedings{maltzahn:hotstorage18-breakout,
address = {Boston, MA},
author = {Carlos Maltzahn},
booktitle = {Breakouts Session abstract at 10th USENIX Workshop on Hot Topics in Storage and File Systems (HotStorage'18, co-located with USENIX ATC'18)},
date-added = {2019-12-26 19:10:01 -0800},
date-modified = {2020-01-19 16:20:17 -0800},
keywords = {shortpapers, storage, embedded, eusocial, programmable},
month = {July 9-10},
title = {Should Storage Devices Stay Dumb or Become Smart?},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA9Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4taG90c3RvcmFnZTE4LWJyZWFrb3V0LnBkZk8RAaoAAAAAAaoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////x9tYWx0emFobi1ob3RzdG9yYWcjRkZGRkZGRkYucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iq2rgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTQAAAgBHLzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpNOm1hbHR6YWhuLWhvdHN0b3JhZ2UxOC1icmVha291dC5wZGYAAA4ARgAiAG0AYQBsAHQAegBhAGgAbgAtAGgAbwB0AHMAdABvAHIAYQBnAGUAMQA4AC0AYgByAGUAYQBrAG8AdQB0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgBFVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYWx0emFobi1ob3RzdG9yYWdlMTgtYnJlYWtvdXQucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABkAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAhI=},
bdsk-url-1 = {https://docs.google.com/presentation/d/1yvXWpxfNWZ4NIL9GLLWM_e3TAm-8Mu-EfAygo1SRRlg/edit?usp=sharing},
bdsk-url-2 = {https://docs.google.com/document/d/1Vfuoy2H8Mg2PrweO5I2sP04gAZonhUIxE3_W9oMFhwI/edit?usp=sharing}}
@inproceedings{kufeldt:fast19poster,
address = {Boston, MA},
author = {Philip Kufeldt and Jianshen Liu and Carlos Maltzahn},
booktitle = {Poster Session at 17th USENIX Conference on File and Storage Technologies (FAST'19)},
date-added = {2019-12-26 19:07:25 -0800},
date-modified = {2019-12-29 16:35:40 -0800},
keywords = {shortpapers, reproducibility, embedded, storage, eusocial},
month = {Februrary 25-28},
title = {MBWU (MibeeWu): Quantifying benefits of offloading data management to storage devices},
year = {2019},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxBTLi4vLi4vLi4vTXkgRHJpdmUvU3VibWlzc2lvbnMvYXJjaGl2ZS8yMDE5LzIwMTkwMTE1IEZBU1QxOVdJUC9rdWZlbGR0LWZhc3QxOXdpcC5wZGZPEQHMAAAAAAHMAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Va3VmZWxkdC1mYXN0MTl3aXAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9hkMrEAAAAAAAAAAAADAAYAAAogY3UAAAAAAAAAAAAAAAAAEjIwMTkwMTE1IEZBU1QxOVdJUAACAF0vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6U3VibWlzc2lvbnM6YXJjaGl2ZToyMDE5OjIwMTkwMTE1IEZBU1QxOVdJUDprdWZlbGR0LWZhc3QxOXdpcC5wZGYAAA4ALAAVAGsAdQBmAGUAbABkAHQALQBmAGEAcwB0ADEAOQB3AGkAcAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAW1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvU3VibWlzc2lvbnMvYXJjaGl2ZS8yMDE5LzIwMTkwMTE1IEZBU1QxOVdJUC9rdWZlbGR0LWZhc3QxOXdpcC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAHoAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAACSg==},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxCbLi4vLi4vLi4vTXkgRHJpdmUvU3VibWlzc2lvbnMvYXJjaGl2ZS8yMDE5LzIwMTkwMTE1IEZBU1QxOVdJUC9RdWFudGlmeWluZyBiZW5lZml0cyBvZiBvZmZsb2FkaW5nIGRhdGEgbWFuYWdlbWVudCB0byBzdG9yYWdlIGRldmljZXMgKFBvc3RlcikgKEZBU1QgJzE5KS5wZGZPEQLsAAAAAALsAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8fUXVhbnRpZnlpbmcgYmVuZWZpI0ZGRkZGRkZGLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9kHWegAAAAAAAAAAAADAAYAAAogY3UAAAAAAAAAAAAAAAAAEjIwMTkwMTE1IEZBU1QxOVdJUAACAKUvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6U3VibWlzc2lvbnM6YXJjaGl2ZToyMDE5OjIwMTkwMTE1IEZBU1QxOVdJUDpRdWFudGlmeWluZyBiZW5lZml0cyBvZiBvZmZsb2FkaW5nIGRhdGEgbWFuYWdlbWVudCB0byBzdG9yYWdlIGRldmljZXMgKFBvc3RlcikgKEZBU1QgJzE5KS5wZGYAAA4AvABdAFEAdQBhAG4AdABpAGYAeQBpAG4AZwAgAGIAZQBuAGUAZgBpAHQAcwAgAG8AZgAgAG8AZgBmAGwAbwBhAGQAaQBuAGcAIABkAGEAdABhACAAbQBhAG4AYQBnAGUAbQBlAG4AdAAgAHQAbwAgAHMAdABvAHIAYQBnAGUAIABkAGUAdgBpAGMAZQBzACAAKABQAG8AcwB0AGUAcgApACAAKABGAEEAUwBUACAAJwAxADkAKQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAo1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvU3VibWlzc2lvbnMvYXJjaGl2ZS8yMDE5LzIwMTkwMTE1IEZBU1QxOVdJUC9RdWFudGlmeWluZyBiZW5lZml0cyBvZiBvZmZsb2FkaW5nIGRhdGEgbWFuYWdlbWVudCB0byBzdG9yYWdlIGRldmljZXMgKFBvc3RlcikgKEZBU1QgJzE5KS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAMIAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAADsg==}}
@inproceedings{lefevre:vault20,
address = {Santa Clara, CA},
author = {Jeff LeFevre and Carlos Maltzahn},
booktitle = {2020 Linux Storage and Filesystems Conference (Vault'20, co-located with FAST'20 and NSDI'20)},
date-added = {2019-12-26 19:04:52 -0800},
date-modified = {2020-07-01 12:40:06 -0700},
keywords = {shortpapers, programmable, storage, physicaldesign, nsf1836650, nsf1764102, nsf1705021},
month = {February 24-25},
title = {Scaling databases and file APIs with programmable Ceph object storage},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS12YXVsdDIwLnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNsZWZldnJlLXZhdWx0MjAucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2oBm3wAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxlZmV2cmUtdmF1bHQyMC5wZGYADgAoABMAbABlAGYAZQB2AHIAZQAtAHYAYQB1AGwAdAAyADAALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xlZmV2cmUtdmF1bHQyMC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}
@article{ellis:jbcs94,
author = {Clarence E. Ellis and Carlos Maltzahn},
date-added = {2019-12-26 18:50:02 -0800},
date-modified = {2019-12-26 18:51:29 -0800},
journal = {Journal of the Brazilian Computer Society, Special Edition on CSCW},
keywords = {papers, cscw},
number = {1},
pages = {15--23},
title = {Collaboration with Spreadsheets},
volume = {1},
year = {1994},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAtLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0UtRi9lbGxpcy1qYmNzOTQucGRmTxEBaAAAAAABaAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////EGVsbGlzLWpiY3M5NC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKrG7AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANFLUYAAAIANy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RS1GOmVsbGlzLWpiY3M5NC5wZGYAAA4AIgAQAGUAbABsAGkAcwAtAGoAYgBjAHMAOQA0AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA1VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvRS1GL2VsbGlzLWpiY3M5NC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFQAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABwA==}}
@article{jimenez:tinytocs16,
abstract = {Validating experimental results in the field of computer systems is a challenging task, mainly due to the many changes in software and hardware that computational environments go through. Determining if an experiment is reproducible entails two separate tasks: re-executing the experiment and validating the results. Existing reproducibility efforts have focused on the former, envisioning techniques and infrastructures that make it easier to re-execute an experiment. By focusing on the latter and analyzing the validation workflow that an experiment re-executioner goes through, we notice that validating results is done on the basis of experiment design and high-level goals, rather than exact quantitative metrics.
Based on this insight, we introduce a declarative format for describing the high-level components of an experiment, as well as a language for specifying generic, testable statements that serve as the basis for validation [1,2]. Our language allows to express and validate statements on top of metrics gathered at runtime. We demonstrate the feasibility of this approach by taking an experiment from an already published article and obtain the corresponding experiment specification. We show that, if we had this specification in the first place, validating the original findings would be an almost entirely automated task. If we contrast this with the current state of our practice, where it takes days or weeks (if successful) to reproduce results, we see how making experiment specifications available as part of a publication or as addendum to experimental results can significantly aid in the validation of computer systems research.
Acknowledgements: Work performed under auspices of US DOE by LLNL contract DE-AC52- 07NA27344 ABS-684863 and by SNL contract DE-AC04-94AL85000.},
author = {Ivo Jimenez and Carlos Maltzahn and Jay Lofstead and Adam Moody and Kathryn Mohror and Remzi Arpaci-Dusseau and Andrea Arpaci-Dusseau},
date-added = {2019-12-26 18:43:34 -0800},
date-modified = {2020-01-04 21:15:26 -0700},
journal = {Tiny Transactions on Computer Science (TinyToCS)},
keywords = {papers, reproducibility, evaluation},
title = {I Aver: Providing Declarative Experiment Specifications Facilitates the Evaluation of Computer Systems Research},
volume = {4},
year = {2016},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXRpbnl0b2NzMTYucGRmTxEBgAAAAAABgAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FmppbWVuZXotdGlueXRvY3MxNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKrBKAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIAPS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXotdGlueXRvY3MxNi5wZGYAAA4ALgAWAGoAaQBtAGUAbgBlAHoALQB0AGkAbgB5AHQAbwBjAHMAMQA2AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA7VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2ppbWVuZXotdGlueXRvY3MxNi5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFoAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB3g==}}
@inproceedings{maltzahn:vkika91,
abstract = {Die meisten CAD-Umgebungen betonen die Unterstatzung einzelner Arbeitspliitze und helfen nur sekundiir bei deren Kooperation. Wir schlagen einen umgekehrten Ansatz vor: Entwiirfe entstehen im Rahmen von interagierenden Sharing-Prozessen, die den gemeinsamen Zugang aller Beteiligten zu Konzepten, Aufgaben und Ergebnissen strukturieren. Dieser Ansatz und seine Konsequenzen werden am Beispiel des Software Engineering dargestellt. Aufder Basis einer Formalisierung dieser Prozesse steuert der ConceptTalk-Prototyp eine verteilte Softwareumgebung und speziel/e Kommunikationswerkzeuge aber das Wissensbanksystem ConceptBase. Erfahrungen mit ConceptTalk unterstatzen ein neues Paradigma, das ein Informationssystem als Medium for komplexe Kommunikation betrachtet.},
author = {Carlos Maltzahn and Thomas Rose},
booktitle = {Verteilte K{\"u}nstliche Intelligenz und kooperatives Arbeiten},
date-added = {2019-12-26 18:32:03 -0800},
date-modified = {2020-01-04 21:16:07 -0700},
editor = {W. Brauer and D. Hern{\'a}ndez},
keywords = {papers, cscw, softwareengineering},
pages = {195--206},
publisher = {Springer-Verlag Berlin Heidelberg},
title = {ConceptTalk: Kooperationsunterst{\"u}tzung in Softwareumgebungen},
volume = {291},
year = {1991},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tdmtpa2E5MS5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbWFsdHphaG4tdmtpa2E5MS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqrXUAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAU0AAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TTptYWx0emFobi12a2lrYTkxLnBkZgAADgAqABQAbQBhAGwAdAB6AGEAaABuAC0AdgBrAGkAawBhADkAMQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL00vbWFsdHphaG4tdmtpa2E5MS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}
@inproceedings{leung:msst07,
abstract = {Achieving performance, reliability, and scalability presents a unique set of challenges for large distributed storage. To identify problem areas, there must be a way for developers to have a comprehensive view of the entire storage system. That is, users must be able to understand both node specific behavior and complex relationships between nodes. We present a distributed file system profiling method that supports such analysis. Our approach is based on combining node-specific metrics into a single cohesive system image. This affords users two views of the storage system: a micro, per-node view, as well as, a macro, multi- node view, allowing both node-specific and complex inter- nodal problems to be debugged. We visualize the storage system by displaying nodes and intuitively animating their metrics and behavior allowing easy analysis of complex problems.},
address = {Santa Clara, CA},
author = {Andrew Leung and Eric Lalonde and Jacob Telleen and James Davis and Carlos Maltzahn},
booktitle = {Proceedings of the 24th IEEE Conference on Mass Storage Systems and Technologies (MSST 2007)},
date-added = {2019-12-26 18:07:11 -0800},
date-modified = {2020-01-04 21:16:58 -0700},
keywords = {papers, performance, debuggung, distributed, storage, systems},
month = {September},
title = {Using Comprehensive Analysis for Performance Debugging in Distributed Storage Systems},
year = {2007},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGV1bmctbXNzdDA3LnBkZk8RAWIAAAAAAWIAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xBsZXVuZy1tc3N0MDcucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iqn9gAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA1LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxldW5nLW1zc3QwNy5wZGYAAA4AIgAQAGwAZQB1AG4AZwAtAG0AcwBzAHQAMAA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgAzVXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9sZXVuZy1tc3N0MDcucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABSAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbg=}}
@inproceedings{lofstead:pdsw13,
abstract = {The rise of Integrated Application Workflows (IAWs) for processing data prior to storage on persistent media prompts the need to incorporate features that reproduce many of the semantics of persistent storage devices. One such feature is the ability to manage data sets as chunks with natural barriers between different data sets. Towards that end, we need a mechanism to ensure that data moved to an intermediate storage area is both complete and correct before allowing access by other processing components. The Dou- bly Distributed Transactions (D2T) protocol offers such a mechanism. The initial development [9] suffered from scal- ability limitations and undue requirements on server processes. The current version has addressed these limitations and has demonstrated scalability with low overhead.},
address = {Denver, CO},
author = {Jay Lofstead and Jai Dayal and Ivo Jimenez and Carlos Maltzahn},
booktitle = {8th Parallel Data Storage Workshop at Supercomputing '13 (PDSW 2013)},
date-added = {2019-12-26 16:21:31 -0800},
date-modified = {2020-01-04 21:17:41 -0700},
keywords = {papers, transactions, datamanagement, hpc},
month = {November 18},
title = {Efficient Transactions for Parallel Data Movement},
year = {2013},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtcGRzdzEzLnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNsb2ZzdGVhZC1wZHN3MTMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iql+wAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxvZnN0ZWFkLXBkc3cxMy5wZGYADgAoABMAbABvAGYAcwB0AGUAYQBkAC0AcABkAHMAdwAxADMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9ML2xvZnN0ZWFkLXBkc3cxMy5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}
@inproceedings{lofstead:iasds14,
abstract = {The DOE Extreme-Scale Technology Acceleration Fast Forward Storage and IO Stack project is going to have significant impact on storage systems design within and beyond the HPC community. With phase 1 of the project complete, it is an excellent opportunity to evaluate many of the decisions made to feed into the phase 2 effort. With this paper we not only provide a timely summary of important aspects of the design specifications but also capture the underlying reasoning that is not available elsewhere.
The initial effort to define a next generation storage system has made admirable contributions in architecture and design. Formalizing the general idea of data staging into burst buffers for the storage system will help manage the performance variability and offer additional data processing opportunities outside the main compute and storage system. Adding a transactional mech- anism to manage faults and data visibility helps enable effective analytics without having to work around the IO stack semantics. While these and other contributions are valuable, similar efforts made elsewhere may offer attractive alternatives or differing semantics that could yield a more feature rich environment with little to no additional overhead. For example, the Doubly Distributed Transactions (D2T) protocol offers an alternative approach for incorporating transactional semantics into the data path. Another project, PreDatA, examined how to get the best throughput for data operators and may offer additional insights into further refinements of the Burst Buffer concept.
This paper examines some of the choices made by the Fast Forward team and compares them with other options and offers observations and suggestions based on these other efforts. This will include some non-core contributions of other projects, such as some of the demonstration metadata and data storage components generated while implementing D2T, to make suggestions that may help the next generation design for how the IO stack works as a whole.},
address = {Minneapolis, MN},
author = {Jay Lofstead and Ivo Jimenez and Carlos Maltzahn},
booktitle = {Workshop on Interfaces and Architectures for Scientific Data Storage (IASDS 2014)},
date-added = {2019-12-26 16:17:49 -0800},
date-modified = {2020-01-04 23:08:26 -0700},
keywords = {papers, datamanagement, hpc},
month = {September 9-12},
title = {Consistency and Fault Tolerance Considerations for the Next Iteration of the DOE Fast Forward Storage and IO Project},
year = {2014},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtaWFzZHMxNC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbG9mc3RlYWQtaWFzZHMxNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqjgAAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsb2ZzdGVhZC1pYXNkczE0LnBkZgAADgAqABQAbABvAGYAcwB0AGUAYQBkAC0AaQBhAHMAZABzADEANAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtaWFzZHMxNC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}
@inproceedings{lofstead:discs14,
abstract = {Scientific simulations are moving away from using centralized persistent storage for intermediate data between workflow steps towards an all online model. This shift is motivated by the relatively slow IO bandwidth growth compared with compute speed increases. The challenges presented by this shift to Integrated Application Workflows are motivated by the loss of persistent storage semantics for node-to-node communication. One step towards addressing this semantics gap is using transac- tions to logically delineate a data set from 100,000s of processes to 1000s of servers as an atomic unit.
Our previously demonstrated Doubly Distributed Transac- tions (D2T) protocol showed a high-performance solution, but had not explored how to detect and recover from faults. Instead, the focus was on demonstrating high-performance typical case performance. The research presented here addresses fault detec- tion and recovery based on the enhanced protocol design. The total overhead for a full transaction with multiple operations at 65,536 processes is on average 0.055 seconds. Fault detection and recovery mechanisms demonstrate similar performance to the success case with only the addition of appropriate timeouts for the system. This paper explores the challenges in designing a recoverable protocol for doubly distributed transactions, partic- ularly for parallel computing environments.},
address = {New Orleans, LA},
author = {Jay Lofstead and Jai Dayal and Ivo Jimenez and Carlos Maltzahn},
booktitle = {The 2014 International Workshop on Data-Intensive Scalable Computing Systems (DISCS-2014) (Workshop co-located with Supercomputing 2014)},
date-added = {2019-12-26 16:14:45 -0800},
date-modified = {2020-01-04 21:18:57 -0700},
keywords = {papers, datamanagement, hpc},
month = {November 16},
title = {Efficient, Failure Resilient Transactions for Parallel and Distributed Computing},
year = {2014},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtZGlzY3MxNC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8UbG9mc3RlYWQtZGlzY3MxNC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqjVsAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsb2ZzdGVhZC1kaXNjczE0LnBkZgAADgAqABQAbABvAGYAcwB0AGUAYQBkAC0AZABpAHMAYwBzADEANAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtZGlzY3MxNC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}
@inproceedings{jimenez:woc15,
abstract = {Evaluating experimental results in the field of com- puter systems is a challenging task, mainly due to the many changes in software and hardware that computational environ- ments go through. In this position paper, we analyze salient features of container technology that, if leveraged correctly, can help reduce the complexity of reproducing experiments in systems research. We present a use case in the area of distributed storage systems to illustrate the extensions that we envision, mainly in terms of container management infrastructure. We also discuss the benefits and limitations of using containers as a way of reproducing research in other areas of experimental systems research.},
address = {Tempe, AZ},
author = {Ivo Jimenez and Carlos Maltzahn and Adam Moody and Kathryn Mohror and Jay Lofstead and Remzi Arpaci-Dusseau and Andrea Arpaci-Dusseau},
booktitle = {First Workshop on Containers (WoC 2015) (Workshop co-located with IEEE International Conference on Cloud Engineering - IC2E 2015)},
date-added = {2019-12-26 16:08:16 -0800},
date-modified = {2020-01-19 16:41:52 -0800},
keywords = {papers, reproducibility, containers},
month = {March 9-13},
title = {The Role of Container Technology in Reproducible Computer Systems Research},
year = {2015},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXdvYzE1LnBkZk8RAWoAAAAAAWoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xFqaW1lbmV6LXdvYzE1LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iqMtQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADgvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaW1lbmV6LXdvYzE1LnBkZgAOACQAEQBqAGkAbQBlAG4AZQB6AC0AdwBvAGMAMQA1AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA2VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2ppbWVuZXotd29jMTUucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFUAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABww==}}
@inproceedings{lofstead:sc16,
abstract = {The DOE Extreme-Scale Technology Acceleration Fast Forward Storage and IO Stack project is going to have significant impact on storage systems design within and beyond the HPC community. With phase two of the project starting, it is an excellent opportunity to explore the complete design and how it will address the needs of extreme scale platforms. This paper examines each layer of the proposed stack in some detail along with cross-cutting topics, such as transactions and metadata management.
This paper not only provides a timely summary of important aspects of the design specifications but also captures the under- lying reasoning that is not available elsewhere. We encourage the broader community to understand the design, intent, and future directions to foster discussion guiding phase two and the ultimate production storage stack based on this work. An initial performance evaluation of the early prototype implementation is also provided to validate the presented design.
},
address = {Salt Lake City, UT},
author = {Jay Lofstead and Ivo Jimenez and Carlos Maltzahn and Quincey Koziol and John Bent and Eric Barton},
booktitle = {29th ACM and IEEE International Conference for High Performance Computing, Networking, Storage and Analysis (SC16)},
date-added = {2019-12-26 15:58:41 -0800},
date-modified = {2020-01-04 21:19:51 -0700},
keywords = {papers, parallel, storage, hpc, exascale},
month = {November 13-18},
title = {DAOS and Friends: A Proposal for an Exascale Storage System},
year = {2016},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbG9mc3RlYWQtc2MxNi5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbG9mc3RlYWQtc2MxNi5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9mAdiIAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsb2ZzdGVhZC1zYzE2LnBkZgAOACQAEQBsAG8AZgBzAHQAZQBhAGQALQBzAGMAMQA2AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9sb2ZzdGVhZC1zYzE2LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}
@inproceedings{jimenez:icpe18,
abstract = {We introduce quiho, a framework for profiling application performance that can be used in automated performance regression tests. quiho profiles an application by applying sensitivity analysis, in particular statistical regression analysis (SRA), using application-independent performance feature vectors that characterize the performance of machines. The result of the SRA, feature importance specifically, is used as a proxy to identify hardware and low-level system software behavior. The relative importance of these features serve as a performance profile of an application (termed inferred resource utilization profile or IRUP), which is used to automatically validate performance behavior across multiple revisions of an application's code base without having to instrument code or obtain performance counters. We demonstrate that quiho can successfully discover performance regressions by showing its effectiveness in profiling application performance for synthetically introduced regressions as well as those found in real-world applications.},
address = {Berlin, Germany},
author = {Ivo Jimenez and Noah Watkins and Michael Sevilla and Jay Lofstead and Carlos Maltzahn},
booktitle = {9th ACM/SPEC International Conference on Performance Engineering (ICPE 2018)},
date-added = {2019-12-26 15:51:19 -0800},
date-modified = {2020-07-01 12:46:23 -0700},
keywords = {papers, reproducibility, performance, testing, cross, sandia, nsf1450488},
month = {April 9-13},
title = {quiho: Automated Performance Regression Testing Using Inferred Resource Utilization Profiles},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LWljcGUxOC5wZGZPEQFwAAAAAAFwAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8SamltZW5lei1pY3BlMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9wly2wAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA5LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6amltZW5lei1pY3BlMTgucGRmAAAOACYAEgBqAGkAbQBlAG4AZQB6AC0AaQBjAHAAZQAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADdVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1pY3BlMTgucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABWAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAco=}}
@inproceedings{jimenez:rescue-hpc18,
abstract = {Advances in agile software delivery methodologies and tools (commonly referred to as DevOps) have not yet materialized in academic scenarios such as university, industry and government laboratories. In this position paper we make the case for Black Swan, a platform for the agile implementation, maintenance and curation of experimentation pipelines by embracing a DevOps approach.},
address = {Dallas, TX},
author = {Ivo Jimenez and Carlos Maltzahn},
booktitle = {1st Workshop on Reproducible, Customizable and Portable Workflows for HPC (ResCuE-HPC'18, co-located with SC'18)},
date-added = {2019-12-26 15:45:05 -0800},
date-modified = {2020-07-01 12:44:44 -0700},
keywords = {papers, reproducibility, cross},
month = {November 11},
title = {Spotting Black Swans With Ease: The Case for a Practical Reproducibility Platform},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXJlc2N1ZS1ocGMxOC5wZGZPEQGIAAAAAAGIAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8YamltZW5lei1yZXNjdWUtaHBjMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9oqhuQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA/LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6amltZW5lei1yZXNjdWUtaHBjMTgucGRmAAAOADIAGABqAGkAbQBlAG4AZQB6AC0AcgBlAHMAYwB1AGUALQBoAHAAYwAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD1Vc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1yZXNjdWUtaHBjMTgucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABcAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAeg=},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA8Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXJlc2N1ZS1ocGMxOC1zbGlkZXMucGRmTxEBogAAAAABogACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////H2ppbWVuZXotcmVzY3VlLWhwYzE4LXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKoc5AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIARi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXotcmVzY3VlLWhwYzE4LXNsaWRlcy5wZGYADgBAAB8AagBpAG0AZQBuAGUAegAtAHIAZQBzAGMAdQBlAC0AaABwAGMAMQA4AC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAERVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1yZXNjdWUtaHBjMTgtc2xpZGVzLnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABjAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAgk=}}
@inproceedings{liu:iodc19,
abstract = {The storage industry is considering new kinds of storage de- vices that support data access function offloading, i.e. the ability to perform data access functions on the storage device itself as opposed to performing it on a separate compute system to which the storage device is connected. But what is the benefit of offloading to a storage device that is controlled by an embedded platform, very different from a host platform? To quantify the benefit, we need a measurement methodology that enables apple-to-apple comparisons between different platforms. We propose a Media-based Work Unit (MBWU, pronounced ''MibeeWu''), and an MBWU-based measurement methodology to standardize the platform efficiency evaluation so as to quantify the benefit of offloading. To demonstrate the merit of this methodology, we implemented a prototype to automate quantifying the benefit of offloading the key-value data access function.},
address = {Frankfurt a. M., Germany},
author = {Jianshen Liu and Philip Kufeldt and Carlos Maltzahn},
booktitle = {HPC I/O in the Data Center Workshop (HPC-IODC 2019, co-located with ISC-HPC 2019)},
date-added = {2019-12-26 15:40:05 -0800},
date-modified = {2020-07-01 13:11:21 -0700},
keywords = {papers, reproducibility, performance, embedded, storage, eusocial, cross},
month = {June 20},
title = {MBWU: Benefit Quantification for Data Access Function Offloading},
year = {2019},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxApLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWlvZGMxOS5wZGZPEQFaAAAAAAFaAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8ObGl1LWlvZGMxOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9lvmDAAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAMy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsaXUtaW9kYzE5LnBkZgAADgAeAA4AbABpAHUALQBpAG8AZABjADEAOQAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAMVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWlvZGMxOS5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFAAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABrg==},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWlvZGMxOS1zbGlkZXMucGRmTxEBdAAAAAABdAACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////FWxpdS1pb2RjMTktc2xpZGVzLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////aKoXmAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFMAAACADovOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkw6bGl1LWlvZGMxOS1zbGlkZXMucGRmAA4ALAAVAGwAaQB1AC0AaQBvAGQAYwAxADkALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAOFVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGl1LWlvZGMxOS1zbGlkZXMucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFcAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzw==}}
@inproceedings{dahlgren:pdsw19,
abstract = {In the post-Moore era, systems and devices with new architectures will arrive at a rapid rate with significant impacts on the software stack. Applications will not be able to fully benefit from new architectures unless they can delegate adapting to new devices in lower layers of the stack. In this paper we introduce physical design management which deals with the problem of identifying and executing transformations on physical designs of stored data, i.e. how data is mapped to storage abstractions like files, objects, or blocks, in order to improve performance. Physical design is traditionally placed with applications, access libraries, and databases, using hard- wired assumptions about underlying storage systems. Yet, storage systems increasingly not only contain multiple kinds of storage devices with vastly different performance profiles but also move data among those storage devices, thereby changing the benefit of a particular physical design. We advocate placing physical design management in storage, identify interesting research challenges, provide a brief description of a prototype implementation in Ceph, and discuss the results of initial experiments at scale that are replicable using Cloudlab. These experiments show performance and resource utilization trade-offs associated with choosing different physical designs and choosing to transform between physical designs.},
address = {Denver, CO},
author = {Kathryn Dahlgren and Jeff LeFevre and Ashay Shirwadkar and Ken Iizawa and Aldrin Montana and Peter Alvaro and Carlos Maltzahn},
booktitle = {4th International Parallel Data Systems Workshop (PDSW 2019, co-located with SC'19)},
date-added = {2019-12-26 15:35:44 -0800},
date-modified = {2020-07-01 12:44:17 -0700},
keywords = {papers, programmable, storage, datamanagement, physicaldesign, cross, nsf1836650, nsf1764102, nsf1705021},
month = {November 18},
title = {Towards Physical Design Management in Storage Systems},
year = {2019},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0QvZGFobGdyZW4tcGRzdzE5LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNkYWhsZ3Jlbi1wZHN3MTkucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2iqEdAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABRAAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpEOmRhaGxncmVuLXBkc3cxOS5wZGYADgAoABMAZABhAGgAbABnAHIAZQBuAC0AcABkAHMAdwAxADkALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9EL2RhaGxncmVuLXBkc3cxOS5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}
@inproceedings{uta:nsdi20,
abstract = {Performance variability has been acknowledged as a problem for over a decade by cloud practitioners and performance engineers. Yet, our survey of top systems conferences reveals that the research community regularly disregards variability when running experiments in the cloud. Focusing on networks, we assess the impact of variability on cloud-based big-data workloads by gathering traces from mainstream commercial clouds and private research clouds. Our data collection consists of millions of datapoints gathered while transferring over 9 petabytes of data. We characterize the network variability present in our data and show that, even though commercial cloud providers implement mechanisms for quality-of-service enforcement, variability still occurs, and is even exacerbated by such mechanisms and service provider policies. We show how big-data workloads suffer from significant slowdowns and lack predictability and replicability, even when state-of-the-art experimentation techniques are used. We provide guidelines for practitioners to reduce the volatility of big data performance, making experiments more repeatable.},
address = {Santa Clara, CA},
author = {Alexandru Uta and Alexandru Custura and Dmitry Duplyakin and Ivo Jimenez and Jan Rellermeyer and Carlos Maltzahn and Robert Ricci and Alexandru Iosup},
booktitle = {NSDI '20},
date-added = {2019-12-26 15:33:24 -0800},
date-modified = {2020-07-01 12:48:02 -0700},
keywords = {papers, reproducibility, datacenter, performance, cross, nsf1450488, nsf1705021, nsf1764102, nsf1836650},
month = {February 25-27},
title = {Is Big Data Performance Reproducible in Modern Cloud Networks?},
year = {2020},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1UtVi91dGEtbnNkaTIwLnBkZk8RAWAAAAAAAWAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w51dGEtbnNkaTIwLnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2mgzfwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADVS1WAAACADUvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlUtVjp1dGEtbnNkaTIwLnBkZgAADgAeAA4AdQB0AGEALQBuAHMAZABpADIAMAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAM1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1UtVi91dGEtbnNkaTIwLnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAUgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAG2}}
@inproceedings{lefevre:vault19,
abstract = {Ceph is an open source distributed storage system that is object-based and massively scalable. Ceph provides developers with the capability to create data interfaces that can take advantage of local CPU and memory on the storage nodes (Ceph Object Storage Devices). These interfaces are powerful for application developers and can be created in C, C++, and Lua.
Skyhook is an open source storage and database project in the Center for Research in Open Source Software at UC Santa Cruz. Skyhook uses these capabilities in Ceph to create specialized read/write interfaces that leverage IO and CPU within the storage layer toward database processing and management. Specifically, we develop methods to apply predicates locally as well as additional metadata and indexing capabilities using Ceph's internal indexing mechanism built on top of RocksDB.
Skyhook's approach helps to enable scale-out of a single node database system by scaling out the storage layer. Our results show the performance benefits for some queries indeed scale well as the storage layer scales out.},
address = {Boston, MA},
author = {Jeff LeFevre and Noah Watkins and Michael Sevilla and Carlos Maltzahn},
booktitle = {2019 Linux Storage and Filesystems (Vault'19, co-located with FAST'19)},
date-added = {2019-08-07 17:58:01 -0700},
date-modified = {2020-07-01 12:49:10 -0700},
keywords = {papers, programmable, storage, database, cross, nsf1705021, nsf1764102, nsf1836650},
month = {Februrary 25-26},
title = {Skyhook: Programmable storage for databases},
year = {2019},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS12YXVsdDE5LXNsaWRlcy5wZGZPEQGKAAAAAAGKAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8abGVmZXZyZS12YXVsdDE5LXNsaWRlcy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9nVvz8AAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUwAAAIAPy86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TDpsZWZldnJlLXZhdWx0MTktc2xpZGVzLnBkZgAADgA2ABoAbABlAGYAZQB2AHIAZQAtAHYAYQB1AGwAdAAxADkALQBzAGwAaQBkAGUAcwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAPVVzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0wvbGVmZXZyZS12YXVsdDE5LXNsaWRlcy5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFwAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAAB6g==}}
@inproceedings{david:precs19,
abstract = {Computer network research experiments can be broadly grouped in three categories: simulated, controlled, and real-world experiments. Simulation frameworks, experiment testbeds and measurement tools, respectively, are commonly used as the platforms for carrying out network experiments. In many cases, given the nature of computer network experiments, properly configuring these platforms is a complex and time-consuming task, which makes replicating and validating research results quite challenging. This complexity can be reduced by leveraging tools that enable experiment reproducibility. In this paper, we show how a recently proposed reproducibility tool called Popper facilitates the reproduction of networking exper- iments. In particular, we detail the steps taken to reproduce results in two published articles that rely on simulations. The outcome of this exercise is a generic workflow for carrying out network simulation experiments. In addition, we briefly present two additional Popper workflows for running experiments on controlled testbeds, as well as studies that gather real-world metrics (all code is publicly available on Github). We close by providing a list of lessons we learned throughout this process.},
author = {Andrea David and Mariette Souppe and Ivo Jimenez and Katia Obraczka and Sam Mansfield and Kerry Veenstra and Carlos Maltzahn},
booktitle = {P-RECS'19},
date-added = {2019-06-25 11:22:58 -0700},
date-modified = {2020-07-01 12:50:12 -0700},
keywords = {papers, reproducibility, networking, experience, cross, nsf1450488, nsf1836650},
month = {June 24},
title = {Reproducible Computer Network Experiments: A Case Study Using Popper},
year = {2019},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0QvZGF2aWQtcHJlY3MxOS5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RZGF2aWQtcHJlY3MxOS5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9lvlSUAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUQAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6RDpkYXZpZC1wcmVjczE5LnBkZgAOACQAEQBkAGEAdgBpAGQALQBwAHIAZQBjAHMAMQA5AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvRC9kYXZpZC1wcmVjczE5LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}
@unpublished{liu:ocpgs19,
author = {Jianshen Liu and Philip Kufeldt and Carlos Maltzahn},
date-added = {2019-05-06 18:39:54 -0700},
date-modified = {2020-07-01 12:51:05 -0700},
keywords = {shortpapers, eusocial, storagemedium, performance, cross},
month = {March 14-15},
note = {Poster at OCP Global Summit 2019},
title = {Quantifying benefits of offloading data management to storage devices},
year = {2019},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAxLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0wvbGl1LW9jcGdzMTktcG9zdGVyLnBkZk8RAXoAAAAAAXoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xZsaXUtb2NwZ3MxOS1wb3N0ZXIucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////2PYw6AAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABTAAAAgA7LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpMOmxpdS1vY3BnczE5LXBvc3Rlci5wZGYAAA4ALgAWAGwAaQB1AC0AbwBjAHAAZwBzADEAOQAtAHAAbwBzAHQAZQByAC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA5VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTC9saXUtb2NwZ3MxOS1wb3N0ZXIucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABYAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAdY=}}
@inproceedings{sevilla:hotstorage18,
abstract = {The file system metadata service is the scalability bottleneck for many of today's workloads. Common approaches for attacking this ``metadata scaling wall'' include: caching inodes on clients and servers, caching parent inodes for path traversal, and dynamic caching policies that exploit workload locality. These caches reduce the number of remote procedure calls (RPCs) but the effectiveness is dependent on the overhead of maintaining cache coherence and the administrator's ability to select the best cache size for the given workloads. Recent work reduces the number of metadata RPCs to 1 without using a cache at all, by letting clients ``decouple'' the subtrees from the global namespace so that they can do metadata operations locally. Even with this technique, we show that file system metadata is still a bottleneck because namespaces for today's workloads can be very large. The size is problematic for reads because metadata needs to be transferred and materialized.
The management techniques for file system metadata assume that namespaces have no structure but we observe that this is not the case for all workloads. We propose Tintenfisch, a file system that allows users to succinctly express the structure of the metadata they intend to create. If a user can express the structure of the namespace, Tintenfisch clients and servers can (1) compact metadata, (2) modify large namespaces more quickly, and (3) generate only relevant parts of the namespace. This reduces network traffic, storage footprints, and the number of overall metadata operations needed to complete a job.},
address = {Boston, MA},
annote = {Submitted to HotStorage'18},
author = {Michael A. Sevilla and Reza Nasirigerdeh and Carlos Maltzahn and Jeff LeFevre and Noah Watkins and Peter Alvaro and Margaret Lawson and Jay Lofstead and Jim Pivarski},
booktitle = {HotStorage '18},
date-added = {2018-09-04 00:39:56 -0700},
date-modified = {2020-07-01 12:53:25 -0700},
keywords = {papers, metadata, filesystems, scalable, naming, cross, doeDE-SC0016074, nsf1450488, nsf1705021},
month = {July 9-10},
title = {Tintenfisch: File System Namespace Schemas and Generators},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAzLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1ob3RzdG9yYWdlMTgucGRmTxEBggAAAAABggACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////GHNldmlsbGEtaG90c3RvcmFnZTE4LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////Xs4gIAAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAAFTAAACAD0vOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOlM6c2V2aWxsYS1ob3RzdG9yYWdlMTgucGRmAAAOADIAGABzAGUAdgBpAGwAbABhAC0AaABvAHQAcwB0AG8AcgBhAGcAZQAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADtVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtaG90c3RvcmFnZTE4LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAWgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHg}}
@inproceedings{maricq:osdi18,
abstract = {The performance of compute hardware varies: software run repeatedly on the same server (or a different server with supposedly identical parts) can produce performance results that differ with each execution. This variation has important effects on the reproducibility of systems research and ability to quantitatively compare the performance of different systems. It also has implications for commercial computing, where agreements are often made conditioned on meeting specific performance targets.
Over a period of 10 months, we conducted a large-scale study capturing nearly 900,000 data points from 835 servers. We examine this data from two perspectives: that of a service provider wishing to offer a consistent environment, and that of a systems researcher who must understand how variability impacts experimental results. From this examination, we draw a number of lessons about the types and magnitudes of performance variability and the effects on confidence in experiment results. We also create a statistical model that can be used to understand how representative an individual server is of the general population. The full dataset and our analysis tools are publicly available, and we have built a system to interactively explore the data and make recommendations for experiment parameters based on statistical analysis of historical data.},
address = {Carlsbad, CA},
author = {Aleksander Maricq and Dmitry Duplyakin and Ivo Jimenez and Carlos Maltzahn and Ryan Stutsman and Robert Ricci},
booktitle = {13th USENIX Symposium on Operating Systems Design and Implementation (OSDI'18)},
date-added = {2018-07-21 02:10:24 +0000},
date-modified = {2020-07-01 12:54:52 -0700},
keywords = {papers, performance, statistics, cloud, reproducibility, systems, nsf1450488, cross},
month = {October 8-10},
title = {Taming Performance Variability},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL00vbWFyaWNxLW9zZGkxOC5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RbWFyaWNxLW9zZGkxOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9fT1NAAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAU0AAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6TTptYXJpY3Etb3NkaTE4LnBkZgAOACQAEQBtAGEAcgBpAGMAcQAtAG8AcwBkAGkAMQA4AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvTS9tYXJpY3Etb3NkaTE4LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}
@inproceedings{sevilla:ccgrid18,
abstract = {Our analysis of the key-value activity generated by the ParSplice molecular dynamics simulation demonstrates the need for more complex cache management strategies. Baseline measurements show clear key access patterns and hot spots that offer significant opportunity for optimization. We use the data management language and policy engine from the Mantle system to dynamically explore a variety of techniques, ranging from basic algorithms and heuristics to statistical models, calculus, and machine learning. While Mantle was originally designed for distributed file systems, we show how the collection of abstractions effectively decomposes the problem into manageable policies for a different application and storage system. Our exploration of this space results in a dynamically sized cache policy that does not sacrifice any performance while using 32-66% less memory than the default ParSplice configuration.},
address = {Washington, DC},
author = {Michael A. Sevilla and Carlos Maltzahn and Peter Alvaro and Reza Nasirigerdeh and Bradley W. Settlemyer and Danny Perez and David Rich and Galen M. Shipman},
booktitle = {CCGRID '18},
date-added = {2018-07-01 21:56:37 +0000},
date-modified = {2020-07-01 12:57:24 -0700},
keywords = {papers, caching, programmable, storage, hpc, doeDE-SC0016074, cross},
month = {May 1-4},
title = {Programmable Caches with a Data Management Language \& Policy Engine},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1jY2dyaWQxOC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Uc2V2aWxsYS1jY2dyaWQxOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9ezkIQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVMAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UzpzZXZpbGxhLWNjZ3JpZDE4LnBkZgAADgAqABQAcwBlAHYAaQBsAGwAYQAtAGMAYwBnAHIAaQBkADEAOAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1jY2dyaWQxOC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}
@inproceedings{sevilla:precs18,
abstract = {We describe the four publications we have tried to make reproducible and discuss how each paper has changed our workflows, practices, and collaboration policies. The fundamental insight is that paper artifacts must be made reproducible from the start of the project; artifacts are too difficult to make reproducible when the papers are (1) already published and (2) authored by researchers that are not thinking about reproducibility. In this paper, we present the best practices adopted by our research laboratory, which was sculpted by the pitfalls we have identified for the Popper convention. We conclude with a ``call-to-arms" for the community focused on enhancing reproducibility initiatives for academic conferences, industry environments, and national laboratories. We hope that our experiences will shape a best practices guide for future reproducible papers.},
address = {Tempe, AZ},
author = {Michael A. Sevilla and Carlos Maltzahn},
booktitle = {P-RECS'18},
date-added = {2018-06-12 17:20:57 +0000},
date-modified = {2020-07-01 12:57:49 -0700},
keywords = {papers, reproducibility, experience, cross, nsf1450488},
month = {June 11},
title = {Popper Pitfalls: Experiences Following a Reproducibility Convention},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1wcmVjczE4LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNzZXZpbGxhLXByZWNzMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////10VPrQAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtcHJlY3MxOC5wZGYADgAoABMAcwBlAHYAaQBsAGwAYQAtAHAAcgBlAGMAcwAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtcHJlY3MxOC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}
@article{kufeldt:login18,
abstract = {As storage devices get faster, data management tasks rob the host of CPU cycles and DDR bandwidth. In this article, we examine a new interface to storage devices that can leverage existing and new CPU and DRAM resources to take over data management tasks like availability, recovery, and migrations. This new interface provides a roadmap for device-to-device interactions and more powerful storage devices capable of providing in-store compute services that can dramatically improve performance. We call such storage devices ``eusocial'' because we are inspired by eusocial insects like ants, termites, and bees, which as individuals are primitive but collectively accomplish amazing things.
},
author = {Philip Kufeldt and Carlos Maltzahn and Tim Feldman and Christine Green and Grant Mackey and Shingo Tanaka},
date-added = {2018-06-06 16:06:14 +0000},
date-modified = {2020-07-01 12:58:56 -0700},
journal = {;login: The USENIX Magazine},
keywords = {papers, storage, devices, networking, flash, offloading, cross},
number = {2},
pages = {16--22},
title = {Eusocial Storage Devices - Offloading Data Management to Storage Devices that Can Act Collectively},
volume = {43},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0sva3VmZWxkdC1sb2dpbjE4LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNrdWZlbGR0LWxvZ2luMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////13fyGAAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABSwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpLOmt1ZmVsZHQtbG9naW4xOC5wZGYADgAoABMAawB1AGYAZQBsAGQAdAAtAGwAbwBnAGkAbgAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9LL2t1ZmVsZHQtbG9naW4xOC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}
@inproceedings{jimenez:pdsw15,
abstract = {Validating experimental results in the field of storage systems is a challenging task, mainly due to the many changes in software and hardware that computational environments go through. Determining if an experiment is reproducible entails two separate tasks: re-executing the experiment and validating the results. Existing reproducibility efforts have focused on the former, envisioning techniques and infrastructures that make it easier to re-execute an experiment. In this position paper, we focus on the latter by analyzing the validation workflow that an experiment re-executioner goes through. We notice that validating results is done on the basis of experiment design and high-level goals, rather than exact quantitative metrics. Based on this insight, we introduce a declarative format for specifying the high-level components of an experiment as well as describing generic, testable conditions that serve as the basis for validation. We present a use case in the area of distributed storage systems to illustrate the usefulness of this approach.},
address = {Austin, TX},
author = {Ivo Jimenez and Carlos Maltzahn and Jay Lofstead and Kathryn Mohror and Adam Moody and Remzi Arpaci-Dusseau and Andrea Arpaci-Dusseau},
booktitle = {PDSW'15},
date-added = {2018-05-15 06:28:35 +0000},
date-modified = {2020-01-04 23:42:08 -0700},
keywords = {papers, reproducibility, declarative},
month = {November 15},
title = {Tackling the Reproducibility Problem in Storage Systems Research with Declarative Experiment Specifications},
year = {2015},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LXBkc3cxNS5wZGZPEQFwAAAAAAFwAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8SamltZW5lei1wZHN3MTUucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9cfy+sAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA5LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6amltZW5lei1wZHN3MTUucGRmAAAOACYAEgBqAGkAbQBlAG4AZQB6AC0AcABkAHMAdwAxADUALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADdVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1wZHN3MTUucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABWAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAco=}}
@techreport{sevilla:ucsctr18,
address = {Santa Cruz, CA},
annote = {Submitted to HotStorage'18},
author = {Michael A. Sevilla and Reza Nasirigerdeh and Carlos Maltzahn and Jeff LeFevre and Noah Watkins and Peter Alvaro and Margaret Lawson and Jay Lofstead and Jim Pivarski},
date-added = {2018-04-08 04:09:23 +0000},
date-modified = {2018-04-08 04:13:07 +0000},
institution = {UC Santa Cruz},
keywords = {papers, metadata, filesystems, scalable, naming},
month = {April 7},
number = {UCSC-SOE-18-08},
title = {Tintenfisch: File System Namespace Schemas and Generators},
type = {Tech. rept.},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAvLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS11Y3NjdHIxOC5wZGZPEQFyAAAAAAFyAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8Uc2V2aWxsYS11Y3NjdHIxOC5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9bu4/kAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAVMAAAIAOS86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6UzpzZXZpbGxhLXVjc2N0cjE4LnBkZgAADgAqABQAcwBlAHYAaQBsAGwAYQAtAHUAYwBzAGMAdAByADEAOAAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAN1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS11Y3NjdHIxOC5wZGYAABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFYAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABzA==}}
@inproceedings{jia:hipc17,
abstract = {Accessing external resources (e.g., loading input data, checkpointing snapshots, and out-of-core processing) can have a significant impact on the performance of applications. However, no existing programming systems for high-performance computing directly manage and optimize external accesses. As a result, users must explicitly manage external accesses alongside their computation at the application level, which can result in both correctness and performance issues.
We address this limitation by introducing Iris, a task-based programming model with semantics for external resources. Iris allows applications to describe their access requirements to external resources and the relationship of those accesses to the computation. Iris incorporates external I/O into a deferred execution model, reschedules external I/O to overlap I/O with computation, and reduces external I/O when possible. We evaluate Iris on three microbenchmarks representative of important workloads in HPC and a full combustion simulation, S3D. We demonstrate that the Iris implementation of S3D reduces the external I/O overhead by up to 20x, compared to the Legion and the Fortran implementations.},
address = {Jaipur, India},
author = {Zhihao Jia and Sean Treichler and Galen Shipman and Michael Bauer and Noah Watkins and Carlos Maltzahn and Pat McCormick and Alex Aiken},
booktitle = {HiPC 2017},
date-added = {2018-04-03 18:26:23 +0000},
date-modified = {2020-07-01 12:59:49 -0700},
keywords = {papers, runtime, distributed, programming, storage, cross, doeDE-SC0016074, nsf1450488},
month = {December 18-21},
title = {Integrating External Resources with a Task-Based Programming Model},
year = {2017},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxArLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaWEtaGlwYzE3LnBkZk8RAWAAAAAAAWAAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////w5qaWEtaGlwYzE3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////17ONigAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADUvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjpqaWEtaGlwYzE3LnBkZgAADgAeAA4AagBpAGEALQBoAGkAcABjADEANwAuAHAAZABmAA8AGgAMAE0AYQBjAGkAbgB0AG8AcwBoACAASABEABIAM1VzZXJzL2Nhcmxvc21hbHQvTXkgRHJpdmUvUGFwZXJzL0ktSi9qaWEtaGlwYzE3LnBkZgAAEwABLwAAFQACABH//wAAAAgADQAaACQAUgAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAG2}}
@inproceedings{sevilla:ipdps18,
abstract = {HPC and data center scale application developers are abandoning POSIX IO because file system metadata synchronization and serialization overheads of providing strong consistency and durability are too costly -- and often unnecessary -- for their applications. Unfortunately, designing file systems with weaker consistency or durability semantics excludes applications that rely on stronger guarantees, forcing developers to re-write their applications or deploy them on a different system. We present a framework and API that lets administrators specify their consistency/durability requirements and dynamically assign them to subtrees in the same namespace, allowing administrators to optimize subtrees over time and space for different workloads. We show similar speedups to related work but more importantly, we show performance improvements when we custom fit subtree semantics to applications such as checkpoint-restart (91.7x speedup), user home directories (0.03 standard deviation from optimal), and users checking for partial results (2\% overhead).},
address = {Vancouver, BC, Canada},
author = {Michael A. Sevilla and Ivo Jimenez and Noah Watkins and Jeff LeFevre and Peter Alvaro and Shel Finkelstein and Patrick Donnelly and Carlos Maltzahn},
booktitle = {IPDPS 2018},
date-added = {2018-03-19 21:24:16 +0000},
date-modified = {2020-07-01 13:03:23 -0700},
keywords = {papers, metadata, datamanagement, programmable, filesystems, storage, systems, cross, nsf1450488, doeDE-SC0016074},
month = {May 21-25},
title = {Cudele: An API and Framework for Programmable Consistency and Durability in a Global Namespace},
year = {2018},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL1Mvc2V2aWxsYS1pcGRwczE4LnBkZk8RAWwAAAAAAWwAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xNzZXZpbGxhLWlwZHBzMTgucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////17OPNgAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAABUwAAAgA4LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpTOnNldmlsbGEtaXBkcHMxOC5wZGYADgAoABMAcwBlAHYAaQBsAGwAYQAtAGkAcABkAHAAcwAxADgALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADZVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9TL3NldmlsbGEtaXBkcHMxOC5wZGYAEwABLwAAFQACABH//wAAAAgADQAaACQAVQAAAAAAAAIBAAAAAAAAAAUAAAAAAAAAAAAAAAAAAAHF}}
@inproceedings{ionkov-pdsw17,
abstract = {Scientific workflows contain an increasing number of interacting applications, often with big disparity between the formats of data being produced and consumed by different applications. This mismatch can result in performance degradation as data retrieval causes multiple read operations (often to a remote storage system) in order to convert the data. Although some parallel filesystems and middleware libraries attempt to identify access patterns and optimize data retrieval, they frequently fail if the patterns are complex.
The goal of ASGARD is to replace I/O operations issued to a file by the processes with a single operation that passes enough semantic information to the storage system, so it can combine (and eventually optimize) the data movement. ASGARD allows application developers to define their application's abstract dataset as well as the subsets of the data (fragments) that are created and used by the HPC codes. It uses the semantic information to generate and execute transformation rules that convert the data between the the memory layouts of the producer and consumer applications, as well as the layout on nonvolatile storage. The transformation engine implements functionality similar to the scatter/gather support available in some file systems. Since data subsets are defined during the initialization phase, i.e., well in advance from the time they are used to store and retrieve data, the storage system has multiple opportunities to optimize both the data layout and the transformation rules in order to increase the overall I/O performance.
In order to evaluate ASGARD's performance, we added support for ASGARD's transformation rules to Ceph's object store RADOS. We created Ceph data objects that allow custom data striping based on ASGARD's fragment definitions. Our tests with the extended RADOS show up to 5 times performance improvements for writes and 10 times performance improvements for reads over collective MPI I/O.},
address = {Denver, CO},
author = {Latchesar Ionkov and Carlos Maltzahn and Michael Lang},
booktitle = {PDSW-DISCS 2017 at SC17},
date-added = {2017-11-07 16:45:07 +0000},
date-modified = {2020-01-04 21:39:53 -0700},
keywords = {papers, replication, layout, language},
month = {Nov 13},
title = {Optimized Scatter/Gather Data Operations for Parallel Storage},
year = {2017},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAuLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9pb25rb3YtcGRzdzE3LnBkZk8RAWoAAAAAAWoAAgAADE1hY2ludG9zaCBIRAAAAAAAAAAAAAAAAAAAAODvc55CRAAB/////xFpb25rb3YtcGRzdzE3LnBkZgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAD/////17OCgwAAAAAAAAAAAAMABAAACiBjdQAAAAAAAAAAAAAAAAADSS1KAAACADgvOlVzZXJzOmNhcmxvc21hbHQ6TXkgRHJpdmU6UGFwZXJzOkktSjppb25rb3YtcGRzdzE3LnBkZgAOACQAEQBpAG8AbgBrAG8AdgAtAHAAZABzAHcAMQA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA2VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSS1KL2lvbmtvdi1wZHN3MTcucGRmABMAAS8AABUAAgAR//8AAAAIAA0AGgAkAFUAAAAAAAACAQAAAAAAAAAFAAAAAAAAAAAAAAAAAAABww==},
bdsk-file-2 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxA1Li4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9pb25rb3YtcGRzdzE3LXNsaWRlcy5wZGZPEQGIAAAAAAGIAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8YaW9ua292LXBkc3cxNy1zbGlkZXMucGRmAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9bLVjQAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAA0ktSgAAAgA/LzpVc2VyczpjYXJsb3NtYWx0Ok15IERyaXZlOlBhcGVyczpJLUo6aW9ua292LXBkc3cxNy1zbGlkZXMucGRmAAAOADIAGABpAG8AbgBrAG8AdgAtAHAAZABzAHcAMQA3AC0AcwBsAGkAZABlAHMALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASAD1Vc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovaW9ua292LXBkc3cxNy1zbGlkZXMucGRmAAATAAEvAAAVAAIAEf//AAAACAANABoAJABcAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAeg=}}
@article{hacker:bams17,
abstract = {Software containers can revolutionize research and education with numerical weather prediction models by easing use and guaranteeing reproducibility.},
author = {Joshua P. Hacker and John Exby and David Gill and Ivo Jimenez and Carlos Maltzahn and Timothy See and Gretchen Mullendore and Kathryn Fossell},
date-added = {2017-08-29 05:50:47 +0000},
date-modified = {2020-01-04 21:40:58 -0700},
journal = {Bull. Amer. Meteor. Soc.},
keywords = {papers, containers, nwp, learning},
pages = {1129--1138},
title = {A Containerized Mesoscale Model and Analysis Toolkit to Accelerate Classroom Learning, Collaborative Research, and Uncertainty Quantification},
volume = {98},
year = {2017},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAsLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0gvaGFja2VyLWJhbXMxNy5wZGZPEQFkAAAAAAFkAAIAAAxNYWNpbnRvc2ggSEQAAAAAAAAAAAAAAAAAAADg73OeQkQAAf////8RaGFja2VyLWJhbXMxNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/////9XKT/kAAAAAAAAAAAADAAQAAAogY3UAAAAAAAAAAAAAAAAAAUgAAAIANi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SDpoYWNrZXItYmFtczE3LnBkZgAOACQAEQBoAGEAYwBrAGUAcgAtAGIAYQBtAHMAMQA3AC4AcABkAGYADwAaAAwATQBhAGMAaQBuAHQAbwBzAGgAIABIAEQAEgA0VXNlcnMvY2FybG9zbWFsdC9NeSBEcml2ZS9QYXBlcnMvSC9oYWNrZXItYmFtczE3LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABTAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAbs=}}
@inproceedings{jimenez:cnert17,
abstract = {This paper introduces PopperCI, a continous integration (CI) service hosted at UC Santa Cruz that allows researchers to automate the end-to-end execution and validation of experiments. PopperCI assumes that experiments follow Popper, a convention for implementing experiments and writing articles following a DevOps approach that has been proposed recently. PopperCI runs experiments on public, private or government-fundend cloud infrastructures in a fully automated way. We describe how PopperCI executes experiments and present a use case that illustrates the usefulness of the service.},
address = {Atlanta, GA},
author = {Ivo Jimenez and Andrea Arpaci-Dusseau and Remzi Arpaci-Dusseau and Jay Lofstead and Carlos Maltzahn and Kathryn Mohror and Robert Ricci},
booktitle = {Workshop on Computer and Networking Experimental Research Using Testbeds (CNERT'17) in conjunction with IEEE INFOCOM 2017},
date-added = {2017-07-31 03:37:33 +0000},
date-modified = {2020-01-04 21:41:20 -0700},
keywords = {papers, reproducibility, devops},
month = {May 1},
title = {PopperCI: Automated Reproducibility Validation},
year = {2017},
bdsk-file-1 = {YnBsaXN0MDDSAQIDBFxyZWxhdGl2ZVBhdGhZYWxpYXNEYXRhXxAwLi4vLi4vLi4vTXkgRHJpdmUvUGFwZXJzL0ktSi9qaW1lbmV6LWNuZXJ0MTcucGRmTxEBcgAAAAABcgACAAAMTWFjaW50b3NoIEhEAAAAAAAAAAAAAAAAAAAA4O9znkJEAAH/////E2ppbWVuZXotY25lcnQxNy5wZGYAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAP/////Vo/T7AAAAAAAAAAAAAwAEAAAKIGN1AAAAAAAAAAAAAAAAAANJLUoAAAIAOi86VXNlcnM6Y2FybG9zbWFsdDpNeSBEcml2ZTpQYXBlcnM6SS1KOmppbWVuZXotY25lcnQxNy5wZGYADgAoABMAagBpAG0AZQBuAGUAegAtAGMAbgBlAHIAdAAxADcALgBwAGQAZgAPABoADABNAGEAYwBpAG4AdABvAHMAaAAgAEgARAASADhVc2Vycy9jYXJsb3NtYWx0L015IERyaXZlL1BhcGVycy9JLUovamltZW5lei1jbmVydDE3LnBkZgATAAEvAAAVAAIAEf//AAAACAANABoAJABXAAAAAAAAAgEAAAAAAAAABQAAAAAAAAAAAAAAAAAAAc0=}}