diff --git a/doc/sphinx/10_microbenchmarks/M1_STREAM/STREAM.rst b/doc/sphinx/10_microbenchmarks/M1_STREAM/STREAM.rst index d3dab238..df20b3c6 100644 --- a/doc/sphinx/10_microbenchmarks/M1_STREAM/STREAM.rst +++ b/doc/sphinx/10_microbenchmarks/M1_STREAM/STREAM.rst @@ -37,7 +37,21 @@ These operations stress memory and floating point pipelines.They test memory tra Figure of Merit --------------- -The primary FOM is the Triad rate (MB/s). +The primary FOM is the max Triad rate (MB/s). + +Run Rules +--------- + +The program must synchronize between each operation. For instance: + +On a heterogeneous system, run stream for all computational devices. Where there is unified or heterogeneously addressable memory, also provide performance numbers for each device's access to available memory types. + + +For instance: +On a heterogenous node architecture with multi-core CPU with HBM2 memory and a GPU with HBM3 memory Stream performance should be reported for: CPU <-> HBM2, GPU <-> HBM3, CPU <-> HBM3, GPU <-> HBM2 + +Present n CPU we want to see the scale as function of cores. On GPU maximum bandwidth. + Building ======== @@ -60,22 +74,30 @@ Example calculations for results presented here: STREAM ARRAY SIZE CALCULATIONS: -ARRAY_SIZE ~= 4 x (45 MiB cache / processor) x (2 processors) / (3 arrays) / (8 bytes / element) = 15 Mi elements = 15000000 +:: + + ARRAY_SIZE ~= 4 x (45 MiB cache / processor) x (2 processors) / (3 arrays) / (8 bytes / element) = 15 Mi elements = 15000000 + +:: + + HASWELL: Intel(R) Xeon(R) CPU E5-2698 v3 @ 2.30GHz + CACHE: 40M + SOCKETS: 2 + 4 * ( 40M * 2 ) / 3 ARRAYS / 8 Bytes/element = 13.4 Mi elements = 13400000 + +:: -HASWELL: Intel(R) Xeon(R) CPU E5-2698 v3 @ 2.30GHz -CACHE: 40M -SOCKETS: 2 -4 * ( 40M * 2 ) / 3 ARRAYS / 8 Bytes/element = 13.4 Mi elements = 13400000 + BROADWELL: Intel(R) Xeon(R) CPU E5-2695 v4 @ 2.10GHz + CACHE: 45M + SOCKETS: 2 + 4 * ( 45M * 2 ) / 3 ARRAYS / 8 BYTES/ELEMENT = 15.0 Mi elements = 15000000 -BROADWELL: Intel(R) Xeon(R) CPU E5-2695 v4 @ 2.10GHz -CACHE: 45M -SOCKETS: 2 -4 * ( 45M * 2 ) / 3 ARRAYS / 8 BYTES/ELEMENT = 15.0 Mi elements = 15000000 +:: -SAPPHIRE RAPIDS: Intel(R) Xeon(R) Platinum 8480+ -CACHE: 105 -SOCKETS: 2 -4 x (105M * 2 ) / 3 ARRAYS / 8 BYTES/ELEMENT = 35 Mi elements = 35000000 + SAPPHIRE RAPIDS: Intel(R) Xeon(R) Platinum 8480+ + CACHE: 105 + SOCKETS: 2 + 4 x (105M * 2 ) / 3 ARRAYS / 8 BYTES/ELEMENT = 35 Mi elements = 35000000 Running ======= diff --git a/doc/sphinx/10_microbenchmarks/M6_MDTEST/MDTEST.rst b/doc/sphinx/10_microbenchmarks/M6_MDTEST/MDTEST.rst index cf97adb4..ffc1477a 100644 --- a/doc/sphinx/10_microbenchmarks/M6_MDTEST/MDTEST.rst +++ b/doc/sphinx/10_microbenchmarks/M6_MDTEST/MDTEST.rst @@ -21,24 +21,12 @@ Figure of Merit Building ======== -RHEL Systems ------------- - -CrayOS Systems --------------- - Running ======= Input ----- -Independent Variables ---------------------- - -Dependent Variable(s) ---------------------- - Example Results =============== diff --git a/dgemm/README.ACES b/microbenchmarks/dgemm/README.ACES similarity index 100% rename from dgemm/README.ACES rename to microbenchmarks/dgemm/README.ACES diff --git a/dgemm/scripts/loop_dgemm b/microbenchmarks/dgemm/scripts/loop_dgemm similarity index 100% rename from dgemm/scripts/loop_dgemm rename to microbenchmarks/dgemm/scripts/loop_dgemm diff --git a/dgemm/src/Makefile b/microbenchmarks/dgemm/src/Makefile similarity index 100% rename from dgemm/src/Makefile rename to microbenchmarks/dgemm/src/Makefile diff --git a/dgemm/src/Makefile.intel b/microbenchmarks/dgemm/src/Makefile.intel similarity index 100% rename from dgemm/src/Makefile.intel rename to microbenchmarks/dgemm/src/Makefile.intel diff --git a/dgemm/src/mt-dgemm.c b/microbenchmarks/dgemm/src/mt-dgemm.c similarity index 100% rename from dgemm/src/mt-dgemm.c rename to microbenchmarks/dgemm/src/mt-dgemm.c diff --git a/microbenchmarks/mdtest/COPYRIGHT b/microbenchmarks/mdtest/COPYRIGHT new file mode 100644 index 00000000..ef8fc360 --- /dev/null +++ b/microbenchmarks/mdtest/COPYRIGHT @@ -0,0 +1,256 @@ +Copyright (c) 2003, The Regents of the University of California. +Produced at the Lawrence Livermore National Laboratory. +Written by Christopher Morrone , Bill Loewe , +and Tyce McLarty . +UCRL-CODE-155800 +All rights reserved. + +This file is part of mdtest. + +Please also read Our Notice and GNU General Public License. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License (as published by the Free Software +Foundation) version 2, dated June 1991. + +This program is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the IMPLIED WARRANTY OF MERCHANTABILITY or FITNESS FOR A +PARTICULAR PURPOSE. See the terms and conditions of the GNU General Public +License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + + +OUR NOTICE AND TERMS AND CONDITIONS OF THE GNU GENERAL PUBLIC LICENSE + +Our Preamble Notice + +A. This notice is required to be provided under our contract with the U.S. +Department of Energy (DOE). This work was produced at the University of +California, Lawrence Livermore National Laboratory under Contract No. +W-7405-ENG-48 with the DOE. + +B. Neither the United States Government nor the University of California nor +any of their employees, makes any warranty, express or implied, or assumes any +liability or responsibility for the accuracy, completeness, or usefulness of +any information, apparatus, product, or process disclosed, or represents that +its use would not infringe privately-owned rights. + +C. Also, reference herein to any specific commercial products, process, or +services by trade name, trademark, manufacturer or otherwise does not +necessarily constitute or imply its endorsement, recommendation, or favoring +by the United States Government or the University of California. The views and +opinions of authors expressed herein do not necessarily state or reflect those +of the United States Government or the University of California, and shall not +be used for advertising or product endorsement purposes. + +The precise terms and conditions for copying, distribution and modification +follows. + +GNU Terms and Conditions for Copying, Distribution, and Modification + +0. This License applies to any program or other work which contains a notice +placed by the copyright holder saying it may be distributed under the terms of +this General Public License. The "Program," below, refers to any such program +or work, and a "work based on the Program" means either the Program or any +derivative work under copyright law: that is to say, a work containing the +Program or a portion of it, either verbatim or with modifications and/or +translated into another language. (Hereinafter, translation is included +without limitation in the term "modification".) Each licensee is addressed as +"you." + +Activities other than copying, distribution and modification are not covered by +this License; they are outside its scope. The act of running the Program is +not restricted, and the output from the Program is covered only if its contents +constitute a work based on the Program (independent of having been made by +running the Program). Whether that is true depends on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code as +you receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice and +disclaimer of warranty; keep intact all the notices that refer to this License +and to the absence of any warranty; and give any other recipients of the +Program a copy of this License along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you may +at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, +thus forming a work based on the Program, and copy and distribute such +modifications or work under the terms of Section 1 above, provided that you +also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices stating + that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in whole + or in part contains or is derived from the Program or any part thereof, + to be licensed as a whole at no charge to all third parties under the terms + of this License. + + c) If the modified program normally reads commands interactively when run, + you must cause it, when started running for such interactive use in the + most ordinary way, to print or display an announcement including an + appropriate copyright notice and a notice that there is no warranty (or + else, saying that you provide a warranty) and that users may redistribute + the program under these conditions, and telling the user how to view a copy + of this License. (Exception: if the Program itself is interactive but does + not normally print such an announcement, your work based on the Program is + not required to print an announcement.) + +These requirements apply to the modified work as a whole. If identifiable +sections of that work are not derived from the Program, and can be reasonably +considered independent and separate works in themselves, then this License, and +its terms, do not apply to those sections when you distribute them as separate +work. But when you distribute the same section as part of a whole which is a +work based on the Program, the distribution of the whole must be on the terms +of this License, whose permissions for other licensees extend to the entire +whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your +rights to work written entirely by you; rather, the intent is to exercise the +right to control the distribution of derivative or collective works based on +the Program. + +In addition, mere aggregation of another work not based on the Program with the +Program (or with a work based on the Program) on a volume of a storage or +distribution medium does not bring the other work under the scope of this +License. + +3. You may copy and distribute the Program (or a work based on it, under +Section 2) in object code or executable form under the terms of Sections 1 and +2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable source + code, which must be distributed under the terms of Sections 1 and 2 above + on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three years, to + give any third party, for a charge no more than your cost of physically + performing source distribution, a complete machine-readable copy of the + corresponding source code, to be distributed under the terms of Sections 1 + and 2 above on a medium customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer to + distribute corresponding source code. (This alternative is allowed only + for noncommercial distribution and only if you received the program in + object code or executable form with such an offer, in accord with + Subsection b above.) + +The source code for a work means the preferred form the work for making +modifications to it. For an executable work, complete source code means all +the source code for all modules it contains, plus any associated interface +definition files, plus the scripts used to control compilation and installation +of the executable. However, as a special exception, the source code +distributed need not include anything that is normally distributed (in either +source or binary form) with the major components (compiler, kernel, and so on) +of the operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the source +code from the same place counts as distribution of the source code, even though +third parties are not compelled to copy the source along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as +expressly provided under this License. Any attempt otherwise to copy, modify, +sublicense or distribute the Program is void, and will automatically terminate +your rights under this License. However, parties who have received copies, or +rights, from you under this License will not have their licenses terminated so +long as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed it. +However, nothing else grants you permission to modify or distribute the Program +or its derivative works. These actions are prohibited by law if you do not +accept this License. Therefore, by modifying or distributing the Program (or +any work based on the Program), you indicate your acceptance of this License to +do so, and all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), +the recipient automatically receives a license from the original licensor to +copy, distribute or modify the Program subject to these terms and conditions. +You may not impose any further restrictions on the recipients' exercise of the +rights granted herein. You are not responsible for enforcing compliance by +third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), conditions +are imposed on you (whether by court order, agreement or otherwise) that +contradict the conditions of this License, they do not excuse you from the +conditions of this License. If you cannot distribute so as to satisfy +simultaneously your obligations under this License and any other pertinent +obligations, then as a consequence you may not distribute the Program at all. +For example, if a patent license would not permit royalty-free redistribution +of the Program by all those who receive copies directly or indirectly through +you, then the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply and +the section as a whole is intended to apply in other circumstances. + +It is not the purpose to this section to induce you to infringe any patents or +other property right claims or to contest validity of any such claims; this +section has the sole purpose of protecting the integrity of the free software +distribution system, which is implemented by public license practices. Many +people have made generous contributions to the wide range of software +distributed through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing to +distribute software through any other system and a licensee cannot impose that +choice. + +This section is intended to make thoroughly clear what is believed to be a +consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain +countries either by patents or by copyrighted interfaces, the original +copyright holder who places the Program under this License may add an explicit +geographical distribution limitation excluding those countries, so that +distribution is permitted only in or among countries not thus excluded. In +such case, this License incorporates the limitation as if written in the body +of this License. + +9. The Free Software Foundation may publish revised and/or new versions of the +General Public License from time to time. Such new versions will be similar in +spirit to the present version, but may differ in detail to address new problems +or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any later +version," you have the option of following the terms and conditions either of +that version of any later version published by the Free Software Foundation. +If the Program does not specify a version number of this License, you may +choose any version ever published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs +whose distribution conditions are different, write to the author to ask for +permission. For software which is copyrighted by the Free Software Foundation, +write to the Free Software Foundation; we sometimes make exceptions for this. +Our decision to grant permission will be guided by the two goals of preserving +the free status of all derivatives of our free software and or promoting the +sharing and reuse of software generally. + +NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR +THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE +STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE +PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND +PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, +YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL +ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE +PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR +INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA +BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER +OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS diff --git a/microbenchmarks/mdtest/Makefile b/microbenchmarks/mdtest/Makefile new file mode 100644 index 00000000..31514cf1 --- /dev/null +++ b/microbenchmarks/mdtest/Makefile @@ -0,0 +1,40 @@ +#/*****************************************************************************\ +#* * +#* Copyright (c) 2003, The Regents of the University of California * +#* See the file COPYRIGHT for a complete copyright notice and license. * +#* * +#******************************************************************************* +#* +#* CVS info: +#* $RCSfile: Makefile,v $ +#* $Revision: 1.1.1.1.2.1 $ +#* $Date: 2010/05/11 21:25:16 $ +#* $Author: loewe6 $ +#* +#* Purpose: +#* Make mdtest executable. +#* +#* make [mdtest] -- mdtest +#* make clean -- remove executable +#* +#\*****************************************************************************/ + +CC.AIX = mpcc_r -bmaxdata:0x80000000 +CC.Linux = cc -Wall +CC.Darwin = mpicc -Wall + +# Requires GNU Make +OS=$(shell uname) + +# Flags for compiling on 64-bit machines +LARGE_FILE = -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE=1 -D__USE_LARGEFILE64=1 + +CC = $(CC.$(OS)) + +all: mdtest + +mdtest: mdtest.c + $(CC) -D$(OS) $(LARGE_FILE) -g -o mdtest mdtest.c -lm + +clean: + rm -f mdtest mdtest.o diff --git a/microbenchmarks/mdtest/Makefile.XROADS b/microbenchmarks/mdtest/Makefile.XROADS new file mode 100644 index 00000000..0a94dcbe --- /dev/null +++ b/microbenchmarks/mdtest/Makefile.XROADS @@ -0,0 +1,12 @@ +# +# You may wish to specify CC, CFLAGS, and/or LDFLAGS when compiling, e.g., +# +# make CC=mpicc CFLAGS=-g +# + +.PHONY: clean + +mdtest: mdtest.o + +clean: + @rm mdtest *.o diff --git a/microbenchmarks/mdtest/README b/microbenchmarks/mdtest/README new file mode 100644 index 00000000..c5d1299d --- /dev/null +++ b/microbenchmarks/mdtest/README @@ -0,0 +1,136 @@ +/******************************************************************************\ +* * +* Copyright (c) 2003, The Regents of the University of California * +* See the file COPYRIGHT for a complete copyright notice and license. * +* * +\******************************************************************************/ + +Usage: mdtest [-b #] [-B] [-c] [-C] [-d testdir] [-D] [-e] [-E] [-f first] [-F] + [-h] [-i iterations] [-I #] [-l last] [-L] [-n #] [-N #] [-p seconds] + [-r] [-R[#]] [-s #] [-S] [-t] [-T] [-u] [-v] [-V #] [-w #] [-y] + [-z #] + + -b: branching factor of hierarchical directory structure + -B: no barriers between phases (create/stat/remove) + -c: collective creates: task 0 does all creates and deletes + -C: only create files/dirs + -d: the directory in which the tests will run + -D: perform test on directories only (no files) + -e: number of bytes to read from each file + -E: only read files + -f: first number of tasks on which the test will run + -F: perform test on files only (no directories) + -h: prints help message + -i: number of iterations the test will run + -I: number of items per tree node + -l: last number of tasks on which the test will run + -L: files/dirs created only at leaf level + -n: every task will create/stat/remove # files/dirs per tree + -N: stride # between neighbor tasks for file/dir stat (local=0) + -p: pre-iteration delay (in seconds) + -r: only remove files/dirs + -R: randomly stat files/dirs (optional seed can be provided) + -s: stride between the number of tasks for each test + -S: shared file access (file only, no directories) + -t: time unique working directory overhead + -T: only stat files/dirs + -u: unique working directory for each task + -v: verbosity (each instance of option increments by one) + -V: verbosity value + -w: number of bytes to write to each file + -y: sync file after write completion + -z: depth of hierarchical directory structure + +NOTES: + * -N allows a "read-your-neighbor" approach by setting stride to + tasks-per-node + * -d allows multiple paths for the form '-d fullpath1@fullpath2@fullpath3' + * -B allows each task to time itself. The aggregate results reflect this + change. + * -n and -I cannot be used together. -I specifies the number of files/dirs + created per tree node, whereas the -n specifies the total number of + files/dirs created over an entire tree. When using -n, integer division is + used to determine the number of files/dirs per tree node. (E.g. if -n is + 10 and there are 4 tree nodes (z=1 and b=3), there will be 2 files/dirs per + tree node.) + * -R and -T can be used separately. -R merely indicates that if files/dirs + are going to be stat'ed, then they will be stat'ed randomly. + + +Illustration of terminology: + + Hierarchical directory structure (tree) + + ======= + | | (tree node) + ======= + / | \ + ------ | ------ + / | \ + ======= ======= ======= + | | | | | | (leaf level) + ======= ======= ======= + + In this example, the tree has a depth of one (z=1) and branching factor of + three (b=3). The node at the top of the tree is the root node. The level + of nodes furthest from the root is the leaf level. All trees created by + mdtest are balanced. + + To see how mdtest operates, do a simple run like the following: + + mdtest -z 1 -b 3 -I 10 -C -i 3 + + This command will create a tree like the one above, then each task will + create 10 files/dirs per tree node. Three of these trees will be created + (one for each iteration). + + +Example usages: + +mdtest -I 10 -z 5 -b 2 + + A directory tree is created in the current working directory that has a + depth of 5 and a branching factor of 2. Each task operates on 10 + files/dirs in each tree node. + +mdtest -I 10 -z 5 -b 2 -R + + This example is the same as the previous one except that the files/dirs are + stat'ed randomly. + +mdtest -I 10 -z 5 -b 2 -R4 + + Again, this example is the same as the previous except a seed of 4 is + passed to the random number generator. + +mdtest -I 10 -z 5 -b 2 -L + + A directory tree is created as described above, but in this example + files/dirs exist only at the leaf level of the tree. + +mdtest -n 100 -i 3 -d /users/me/testing + + Each task creates 100 files/dirs in a root node (there are no branches + out of the root node) within the path /users/me/testing. This is done + three times. Aggregate values are calculated over the iterations. + +mdtest -n 100 -F -C + + Each task only creates 100 files in the current directory. + Directories are not created. The files are neither stat'ed nor + removed. + +mdtest -I 5 -z 3 -b 5 -u -d /users/me/testing + + Each task creates a directory tree in the /users/me/testing + directory. Each tree has a depth of 3 and a branching factor of + 5. Five files/dirs are operated upon in each node of each tree. + +mdtest -I 5 -z 3 -b 5 -u -d /users/me/testing@/some/other/location + + This run is the same as the previous except that each task creates + its tree in a different directory. Task 0 will create a tree in + /users/me/testing. Task 1 will create a tree in /some/other/location. + After all of the directories are used, the remaining tasks round- + robin over the directories supplied. (I.e. Task 2 will create a + tree in /users/me/testing, etc.) diff --git a/microbenchmarks/mdtest/README.XROADS b/microbenchmarks/mdtest/README.XROADS new file mode 100644 index 00000000..2783b547 --- /dev/null +++ b/microbenchmarks/mdtest/README.XROADS @@ -0,0 +1,171 @@ +Crossroads/NERSC-9 mdtest Benchmark +================================================================================ + +I. Benchmark Description +-------------------------------------------------------------------------------- +mdtest is designed to measure the performance of various metadata operations and +uses MPI to coordinate the operations and to collect the results. All of the +general run rules for XRoads benchmarking apply. + + +II. Build Instructions +-------------------------------------------------------------------------------- +MPI is required in order to build and run the code. The source code used for +this benchmark is derived from mdtest 1.8.4 and it is included with this +benchmark specification. More information about mdtest is available on +http://mdtest.sourceforge.net. + +After extracting the tar file, ensure that the MPI compiler wrappers (e.g., +`mpicc`) are in `$PATH` and then + + cd mdtest-1.8.4-xroads + make + +This will build the mdtest executable, called `mdtest`. It may be necessary to +specify the `CC`, `CFLAGS`, and `LDFLAGS` variables to ensure correct +compilation of `mdtest`. A simplified Makefile, `Makefile.XROADS`, is also +provided to this end, e.g., + + make -f Makefile.XROADS CC=mpicc CFLAGS=-g + +Either `make` or `make -f Makefile.XROADS` can be used to build the binary used +for this benchmark, but any additional `CFLAGS` or `LDFLAGS` required for +compilation must be reported with the benchmark results. + + +III. Run Rules +-------------------------------------------------------------------------------- +The intent of this benchmark is to measure the performance of file metadata +operations on the platform storage. + +Observed benchmark performance shall be obtained from a storage system +configured as closely as possible to the proposed platform storage. If the +proposed solution includes multiple file access protocols (e.g., pNFS and NFS) +or multiple tiers accessible by applications, benchmark results for mdtest +shall be provided for each protocol and/or tier. + +Performance projections are permissible if they are derived from a similar +system that is considered an earlier generation of the proposed system. + +### Required Runs + +This benchmark is intended to measure the capability of the storage subsystem +to create and delete files, and it contains features that minimize +caching/buffering effects. As such, the Offerer should not utilize +optimizations that cache/buffer file metadata or metadata operations in compute +node memory. + +The Offeror shall run the following tests: + +* creating, statting, and removing at least 1,048,576 files in a single + directory +* creating, statting, and removing at least 1,048,576 files in separate + directories (one directory per MPI process) +* creating, statting, and removing one file by multiple MPI processes + +Each of these tests must be run at the following levels of concurrency: + +1. a single MPI process +2. the optimal number of MPI processes on a single compute node +3. the minimal number of MPI processes on multiple compute nodes that achieves + the peak results for the proposed system +4. the maximum possible MPI-level concurrency on the proposed system. This + could mean + * using one MPI process per CPU core across the entire system + * using the maximum number of MPI processes possible if one MPI process per + core will not be possible on the proposed architecture + * using more than 1,048,576 files if the system is capable of launching + more than 1,048,576 MPI processes + +These tests are configured via command-line arguments, and the following +section provides guidance on passing the correct options to `mdtest` for each +test. + +### Running mdtest + +mdtest is executed as any other standard MPI application would be on the +proposed system (e.g., with `mpirun` or `srun`). For the sake of the +following examples, `mpirun` is used. + +**To run create, stat, and delete tests on files in a shared directory**, an +appropriate `mdtest` command-line invocation may look like + + mpirun -np 64 ./mdtest -F -C -T -r -n 16384 -d /scratch -N 16 + +The following command-line flags MUST be changed: + +* `-n` - the number of files **each MPI process** should manipulate. For a + test run with 64 MPI processes, specifying `-n 16384` will produce the + required 1048576 files (64 MPI processes x 16384). This parameter must + be changed for each level of concurrency. +* `-d /scratch` - the directory in which this test should be run. **This + must be an absolute path.** +* `-N` - MPI rank offset for each separate phase of the test. This parameter + must be equal to the number of MPI processes per node in use (e.g., `-N 16` + for a test with 16 processes per node) to ensure that each test phase (read, + stat, and delete) is performed on a different node. + +The following command-line flags MUST NOT be changed or omitted: + +* `-F` - only operate on files, not directories +* `-C` - perform file creation test +* `-T` - perform file stat test +* `-r` - perform file remove test + +**To have each MPI process write files into a unique directory,** add the `-u` +option: + + mpirun -np 64 ./mdtest -F -C -T -r -n 16384 -d /scratch -N 16 -u + +**To create, stat, and remove one file by multiple MPI processes,** add the `-S` +option: + + mpirun -np 64 ./mdtest -F -C -T -r -n 16384 -d /scratch -N 16 -S + + +IV. Permitted Modifications +-------------------------------------------------------------------------------- + +Modifications to the benchmark application code are only permissible to enable +correct compilation and execution on the target platform. Any modifications +must be fully documented (e.g., as a diff or patch file) and reported with the +benchmark results. + + +V. Reporting Results +-------------------------------------------------------------------------------- + +mdtest will execute file creation, file statting, and file deletion tests for +each run. The rate of file creating/statting/deleting are reported to stdout +at the conclusion of each test, and the following rates should be reported: + +* `File creation` +* `File stat` +* `File removal` + +The maximum values for these rates must be reported for all tests. Reporting +the maximum creation rates from one run and the maximum deletion rates from a +different run is NOT valid. File creation rate has slightly higher importance +for this test, so if the highest observed file creation rate came from a +different run than the highest observed deletion rate, report the results from +the run with the highest file creation rate. + +### Benchmark Platform Description + +The Offeror must provide a comprehensive description of the environment in which +each benchmark was run. This must include: + +* Client and server system configurations, including node and processor counts, + processor models, memory size and speed, and OS (names and versions) +* Storage media and their configurations used for each tier of the storage + subsystem +* Network fabric used to connect servers, clients, and storage, including + network configuration settings and topology +* Client and server configuration settings including + * Client and server sysctl settings + * Driver options + * Network interface options + * File system configuration and mount options +* Compiler name and version, compiler options, and libraries used to build + benchmarks + diff --git a/microbenchmarks/mdtest/RELEASE_LOG b/microbenchmarks/mdtest/RELEASE_LOG new file mode 100644 index 00000000..d2e9abbf --- /dev/null +++ b/microbenchmarks/mdtest/RELEASE_LOG @@ -0,0 +1,103 @@ +Changes in mdtest-1.8.4 + * Added read option to extend create (write) capability. New feature will: + -E: Only perform the read phase of the tests. + -e #: Set the number of Bytes to read from each file. + +Fixes in mdtest-1.8.3 + * Prepared for release on sourceforge.net + +Fixes in mdtest-1.8.2 + * With the new changes issued in mdtest-1.8.0, all files and directories + were operated upon by using the full path to each file/dir. Full paths + are no longer used. Now a relative path is used from the root dir of + each directory tree. + * fixed bug in collective creates and unique directory per task mode + +Fixes in mdtest-1.8.1 + * A new test directory is created for each iteration. Then for each + iteration the directory structure is created/removed. This allowed + multiple iterations of the create-only mode. The name of the test + directories has changed as a result of this fix. Also, aggregate + creation/removal times are computed now over the number of iterations. + +Changes in mdtest-1.8.0 + * added option to create files/dirs in tree-like directory structure: + Previously, all files/dirs were created in one test directory. Now the + root directories of the tree(s) are created in that test directory. + Files/dirs are then created within those root directories or their children. + If the -u flag is specified, then unique trees are created per proc. + Otherwise, one tree is created. This coincides with the previous + functionality. The following flags were added/changed to incorporate this + new feature: + -z #: Indicates the depth of the leaves of the tree. If this flag is not + specified, the depth defaults to 0 (i.e. files/dirs are created in + the top-level directories). + -b #: Indicates the branching factor of the tree. If this flag is not + specified, the branching factor defaults to 1. Branching factor + indicates the number of children that each non-leaf node has. + -L: Indicates that files/dirs should only be created at the leaf level + of the tree. + -I #: Indicates the number of files/dirs that should be created within + each directory of the tree. + -n #: This flag still indicates the total number of files/dirs that should + be created. However, with the new tree structure some calculations + are done to determine the number of files that should be created per + directory in the tree. Due to rounding the actual total number of + files may differ slightly from what is specified. + + * added option to choose which phases to run: + The create, stat, and remove phases of mdtest have been separated. There + are flags now that allow the user to choose which phases they want to + perform. If none of these flags is specified, then the default usage is + to do all of the phases. The user is trusted to be intelligent about their + choice of phases. As a result of the separation of the phases, the naming + convention of the files/dirs had to be altered slightly. + + * added option to not barrier between each phase (create/stat/remove): + A major change in mdtest is the ability to time each proc that is running + the different phases of mdtest. The default functionality is the same as + the previous version - barriers are taken between phases (create/stat/ + remove). Also, in the default case, the resultant times reflect the + slowest rates for each phase. If the -B flag is specified, then no barriers + are taken between the phases. There is a race condition when specifying + this flag, but it is rarely met. The race condition is that one proc might + be trying to remove a file in the shared file case before someone else has + a chance to stat the file. Also, when the -B flag is specified, the + resultant rates are aggregates over the number of iterations and the number + of procs used. The default case, as mentioned above, calculates aggregates + only over the number of iterations where the time for each phase of an + iteration is the time of the slowest proc for that particular phase. + + * added option to stat files/dirs in a random order: + The default usage of mdtest will stat files in sequential order. Now, + however, items can be stat'ed in a random order by specifying the -R flag. + Even though the stat order is random with this usage, items are still only + stat'ed once each. This is achieved by randomly sorting a list of unique + item IDs before running the different tests. A seed for the random number + generator can optionally be provided with the following syntax: -R#. + +Fixes in mdtest-1.7.5 + * changed bug in how test directory was created (race condition) + * added multipath option for test directories ('-d path1@path2@path3') + * added man page and correct malloc error-checking (patches from Jim Garlick) + +Fixes in mdtest-1.7.4: + * folded b_remove_0 branch into main HEAD branch + +Fixes in mdtest-b_remove_0: + * added remove option to only remove files from previous run + +Fixes in mdtest-pre_b_remove_0: + * simple clean up for preparing for branch + +Fixes in mdtest-1.7.3: + * added statfs() to get file system data block and inode usage, replacing + system() call + +Fixes in mdtest-1.7.2: + * initialized declared variables + * modified df disk usage call + * added error-checking for chdir() + +Fixes in mdtest-1.7.1: + * added '-y' option to sync file after write diff --git a/microbenchmarks/mdtest/mdtest.1 b/microbenchmarks/mdtest/mdtest.1 new file mode 100644 index 00000000..ba82d88a --- /dev/null +++ b/microbenchmarks/mdtest/mdtest.1 @@ -0,0 +1,188 @@ +.TH mdtest 1 "2010-05-05" "mdtest-1.8.3" "mdtest" +.SH NAME +mdtest \- test file system metadata performance +.SH SYNOPSIS +.B mdtest +.I "[-options]" +.SH DESCRIPTION +.B mdtest +is a file system metadata performance test designed to run +in a cluster MPI environment against parallel file systems. +.PP +In each iteration of the test, each MPI task creates, stats, and removes +the specified number of directories and/or files and measures the performance +in ops/second. After all the iterations complete, the maximum, minimum, +mean ops/sec and the std. deviation are reported for each operation. +.SH OPTIONS +.TP +.I "-b" branching_factor +The branching factor of the hierarchical directory structure [default: 1]. +.TP +.I "-B" +No barriers will be taken between the phases (create/stat/remove) of the tests. +.TP +.I "-c" +Use ``collective creates'', meaning task 0 does all the creates. +.TP +.I "-C" +Only perform the create phase of the tests. +.TP +.I "-d" testdir[@testdir2] +The directory in which the tests will run. For multiple pathes, must use fully-qualified pathnames. +[default: working directory of mdtest]. +.TP +.I "-D" +Perform test on directories only (no files). +.TP +.I "-e" bytes +Set the number of Bytes to read from each file [default: 0]. +.TP +.I "-E" +Only perform the read phase of the tests. +.TP +.I "-f" first +The first number of tasks on which the test will run +[default: 0]. +.TP +.I "-F" +Perform test on files only (no directories). +.TP +.I "-h" +Display help message. +.TP +.I "-i" iterations +The number of iterations the test will run +[default: 1]. +.TP +.I "-I" items_per_directory +The number of items per directory in the tree [default: 0]. +.TP +.I "-l" last +The last number of tasks on which the test will run +[default: 0]. +.TP +.I "-L" +Files/directories only created at the leaf level of the tree. +.TP +.I "-n" number_of_items +Every process will creat/stat/remove # directories and files +[default: 0]. +.TP +.I "-N" stride +Stride # between neighbor tasks for file/dir stat, 0 = local +[default: 0]. +.TP +.I "-p" seconds +Pre-iteration delay (in seconds). +.TP +.I "-r" +Only perform the remove phase of the tests. +.TP +.I "-R[seed]" +Randomly stat files. There is an optional argument that provides a seed +to the random number generator. (Note: There is no space between the +.I "-R" + and +the seed if one is provided.) +.TP +.I "-s" stride +Stride between the number of tasks for each test +[default: 1]. +.TP +.I "-S" +Shared file access (file only, no directories). +.TP +.I "-t" +Include unique working directory management overhead in the results +(presumes +.I "-u" +option). +.TP +.I "-T" +Only perform the stat phase of the tests. +.TP +.I "-u" +Create a unique working directory for each task +(presumes +.I "-d" +option). +.TP +.I "-v" +Increase verbosity (each instance of option increments by one). +.TP +.I "-V" value +Set verbosity value +[default: 0]. +.TP +.I "-w" bytes +Set the number of Bytes to write to each file after it is created +[default: 0]. +.TP +.I "-z" tree_depth +The depth of the hierarchical directory tree [default: 0]. +.SH EXAMPLES +.SS "Example 1" +.nf +$ mpirun -n 2 ./mdtest -d /tmp/z -n 100 -i 2 + +-- started at 11/23/2009 09:05:29 -- + +mdtest-1.8.1 was launched with 2 total task(s) on 1 nodes +Command line used: ./mdtest -d /tmp/z -n 100 -i 2 +Path: /tmp +FS: 28.8 GiB Used FS: 8.6% 8.6%Inodes: 1.8 Mi Used Inodes: 5.1% + +time to create tree: 0.000078 sec +tree creation rate: 12826.617737 ops/sec + +2 tasks, 200 files/directories + +SUMMARY: (of 2 iterations) + Operation Max Min Mean Std Dev + --------- --- --- ---- ------- + Directory creation: 21489.415 17447.551 19468.483 2020.932 + Directory stat : 154657.227 28731.061 91694.144 62963.083 + Directory removal : 146756.613 21489.415 84123.014 62633.599 + File creation : 42024.989 28731.061 35378.025 6646.964 + File stat : 146756.613 17447.551 82102.082 64654.531 + File removal : 156884.384 42024.989 99454.686 57429.698 + +time to remove tree: 0.001031 sec +tree removal rate: 970.005550 ops/sec + +-- finished at 11/23/2009 09:05:29 -- +.fi +.SS "Example 2" +.nf +$ mpirun -np 2 -H pc6 ./mdtest -d /tmp/z -b 2 -z 3 -I 10 + +-- started at 11/23/2009 09:09:23 -- + +mdtest-1.8.1 was launched with 2 total task(s) on 1 nodes +Command line used: ./mdtest -d /tmp/z -b 2 -z 3 -I 10 +Path: /tmp +FS: 28.8 GiB Used FS: 8.6% 8.6%Inodes: 1.8 Mi Used Inodes: 5.1% + +time to create tree: 0.000765 sec +tree creation rate: 19605.659084 ops/sec + +2 tasks, 300 files/directories + +SUMMARY: (of 1 iterations) + Operation Max Min Mean Std Dev + --------- --- --- ---- ------- + Directory creation: 29365.707 29365.707 29365.707 0.000 + Directory stat : 123701.455 123701.455 123701.455 0.000 + Directory removal : 25623.459 25623.459 25623.459 0.000 + File creation : 38704.743 38704.743 38704.743 0.000 + File stat : 125477.782 125477.782 125477.782 0.000 + File removal : 51911.845 51911.845 51911.845 0.000 + +time to remove tree: 0.000940 sec +tree removal rate: 15960.060883 ops/sec + +-- finished at 11/23/2009 09:09:23 -- +.fi + +.SH "SEE ALSO" +\fBhttp://sourceforge.net/projects/mdtest\fR diff --git a/microbenchmarks/mdtest/mdtest.c b/microbenchmarks/mdtest/mdtest.c new file mode 100644 index 00000000..da2d2605 --- /dev/null +++ b/microbenchmarks/mdtest/mdtest.c @@ -0,0 +1,1942 @@ +/* + * Copyright (C) 2003, The Regents of the University of California. + * Produced at the Lawrence Livermore National Laboratory. + * Written by Christopher J. Morrone , + * Bill Loewe , Tyce McLarty , + * and Ryan Kroiss . + * All rights reserved. + * UCRL-CODE-155800 + * + * Please read the COPYRIGHT file. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License (as published by + * the Free Software Foundation) version 2, dated June 1991. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the IMPLIED WARRANTY OF + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * terms and conditions of the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * CVS info: + * $RCSfile: mdtest.c,v $ + * $Revision: 1.1.1.1.2.1 $ + * $Date: 2010/05/11 21:25:16 $ + * $Author: loewe6 $ + */ + +#include "mpi.h" +#include +#include +#include +#include +#include +#ifdef __APPLE__ +#include +#include +#else +#include +#endif +#include +#include +#include +#include +#include +#include +#include + +#define FILEMODE S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH +#define DIRMODE S_IRUSR|S_IWUSR|S_IXUSR|S_IRGRP|S_IWGRP|S_IXGRP|S_IROTH|S_IXOTH +#define MAX_LEN 1024 +#define RELEASE_VERS "1.8.3" +#define TEST_DIR "#test-dir" +#define ITEM_COUNT 25000 + +typedef struct +{ + double entry[10]; +} table_t; + +int rank; +int size; +int* rand_array; +char testdir[MAX_LEN]; +char testdirpath[MAX_LEN]; +char top_dir[MAX_LEN]; +char base_tree_name[MAX_LEN]; +char ** filenames = NULL; +char hostname[MAX_LEN]; +char unique_dir[MAX_LEN]; +char mk_name[MAX_LEN]; +char stat_name[MAX_LEN]; +char read_name[MAX_LEN]; +char rm_name[MAX_LEN]; +char unique_mk_dir[MAX_LEN]; +char unique_chdir_dir[MAX_LEN]; +char unique_stat_dir[MAX_LEN]; +char unique_read_dir[MAX_LEN]; +char unique_rm_dir[MAX_LEN]; +char unique_rm_uni_dir[MAX_LEN]; +char * write_buffer = NULL; +char * read_buffer = NULL; +int barriers = 1; +int create_only = 0; +int stat_only = 0; +int read_only = 0; +int remove_only = 0; +int leaf_only = 0; +int branch_factor = 1; +int depth = 0; +int num_dirs_in_tree = 0; +int items_per_dir = 0; +int random_seed = 0; +int shared_file = 0; +int files_only = 0; +int dirs_only = 0; +int pre_delay = 0; +int unique_dir_per_task = 0; +int time_unique_dir_overhead = 0; +int verbose = 0; +int throttle = 1; +int items = 0; +int collective_creates = 0; +int write_bytes = 0; +int read_bytes = 0; +int sync_file = 0; +int path_count = 0; +int nstride = 0; /* neighbor stride */ +MPI_Comm testcomm; +table_t * summary_table; + +/* for making/removing unique directory && stating/deleting subdirectory */ +enum {MK_UNI_DIR, STAT_SUB_DIR, READ_SUB_DIR, RM_SUB_DIR, RM_UNI_DIR}; + +#ifdef __linux__ +#define FAIL(msg) do { \ + fprintf(stdout, "%s: Process %d(%s): FAILED in %s, %s: %s\n",\ + timestamp(), rank, hostname, __func__, \ + msg, strerror(errno)); \ + fflush(stdout);\ + MPI_Abort(MPI_COMM_WORLD, 1); \ +} while(0) +#else +#define FAIL(msg) do { \ + fprintf(stdout, "%s: Process %d(%s): FAILED at %d, %s: %s\n",\ + timestamp(), rank, hostname, __LINE__, \ + msg, strerror(errno)); \ + fflush(stdout);\ + MPI_Abort(MPI_COMM_WORLD, 1); \ +} while(0) +#endif + +char *timestamp() { + static char datestring[80]; + time_t timestamp; + + fflush(stdout); + timestamp = time(NULL); + strftime(datestring, 80, "%m/%d/%Y %T", localtime(×tamp)); + + return datestring; +} + +int count_tasks_per_node(void) { + char localhost[MAX_LEN], + hostname[MAX_LEN]; + int count = 1, + i; + MPI_Status status; + + if (gethostname(localhost, MAX_LEN) != 0) { + FAIL("gethostname()"); + } + if (rank == 0) { + /* MPI_receive all hostnames, and compare to local hostname */ + for (i = 0; i < size-1; i++) { + MPI_Recv(hostname, MAX_LEN, MPI_CHAR, MPI_ANY_SOURCE, + MPI_ANY_TAG, MPI_COMM_WORLD, &status); + if (strcmp(hostname, localhost) == 0) { + count++; + } + } + } else { + /* MPI_send hostname to root node */ + MPI_Send(localhost, MAX_LEN, MPI_CHAR, 0, 0, MPI_COMM_WORLD); + } + MPI_Bcast(&count, 1, MPI_INT, 0, MPI_COMM_WORLD); + + return(count); +} + +void delay_secs(int delay) { + if (rank == 0 && delay > 0) { + if (verbose >= 1) { + fprintf(stdout, "delaying %d seconds . . .\n", delay); + fflush(stdout); + } + sleep(delay); + } + MPI_Barrier(testcomm); +} + +void offset_timers(double * t, int tcount) { + double toffset; + int i; + + toffset = MPI_Wtime() - t[tcount]; + for (i = 0; i < tcount+1; i++) { + t[i] += toffset; + } +} + +void parse_dirpath(char *dirpath_arg) { + char * tmp, * token; + char delimiter_string[3] = { '@', '\n', '\0' }; + int i = 0; + + tmp = dirpath_arg; + + if (* tmp != '\0') path_count++; + while (* tmp != '\0') { + if (* tmp == '@') { + path_count++; + } + tmp++; + } + filenames = (char **)malloc(path_count * sizeof(char **)); + if (filenames == NULL) { + FAIL("out of memory"); + } + + token = strtok(dirpath_arg, delimiter_string); + while (token != NULL) { + filenames[i] = token; + token = strtok(NULL, delimiter_string); + i++; + } +} + +void unique_dir_access(int opt) { + if (opt == MK_UNI_DIR) { + MPI_Barrier(testcomm); + if (chdir(unique_chdir_dir) == -1) { + FAIL("Unable to chdir to unique test directory"); + } + } else if (opt == STAT_SUB_DIR) { + if (chdir(unique_stat_dir) == -1) { + FAIL("Unable to chdir to test directory"); + } + } else if (opt == READ_SUB_DIR) { + if (chdir(unique_read_dir) == -1) { + FAIL("Unable to chdir to test directory"); + } + } else if (opt == RM_SUB_DIR) { + if (chdir(unique_rm_dir) == -1) { + FAIL("Unable to chdir to test directory"); + } + } else if (opt == RM_UNI_DIR) { + if (chdir(unique_rm_uni_dir) == -1) { + FAIL("Unable to chdir to test directory"); + } + } +} + +/* helper for creating/removing items */ +void create_remove_items_helper(int dirs, + int create, char* path, int itemNum) { + + int i; + char curr_item[MAX_LEN]; + + for (i=0; i= 3 + && (itemNum+i) % ITEM_COUNT==0 && (itemNum+i != 0)) { + printf("create dir: %d\n", itemNum+i); + fflush(stdout); + } + + //create dirs + sprintf(curr_item, "%sdir.%s%d", path, mk_name, itemNum+i); + if (rank == 0 && verbose >= 2) { + printf("create dir : %s\n", curr_item); + fflush(stdout); + } + if (mkdir(curr_item, DIRMODE) == -1) { + FAIL("unable to create directory"); + } + + } else { + + if (rank == 0 && verbose >= 3 + && (itemNum+i) % ITEM_COUNT==0 && (itemNum+i != 0)) { + printf("remove dir: %d\n", itemNum+i); + fflush(stdout); + } + + //remove dirs + sprintf(curr_item, "%sdir.%s%d", path, rm_name, itemNum+i); + if (rank == 0 && verbose >= 2) { + printf("remove dir : %s\n", curr_item); + fflush(stdout); + } + if (rmdir(curr_item) == -1) { + FAIL("unable to remove directory"); + } + } + + } else { + + int fd; + if (create) { + + if (rank == 0 && verbose >= 3 + && (itemNum+i) % ITEM_COUNT==0 && (itemNum+i != 0)) { + printf("create file: %d\n", itemNum+i); + fflush(stdout); + } + + //create files + sprintf(curr_item, "%sfile.%s%d", path, mk_name, itemNum+i); + if (rank == 0 && verbose >= 2) { + printf("create file: %s\n", curr_item); + fflush(stdout); + } + if (collective_creates) { + if ((fd = open(curr_item, O_RDWR)) == -1) { + FAIL("unable to open file"); + } + } else { + if (shared_file) { + if ((fd = open(curr_item, + O_CREAT|O_RDWR, FILEMODE)) == -1) { + FAIL("unable to create file"); + } + } else { + if ((fd = creat(curr_item, FILEMODE)) == -1) { + FAIL("unable to create file"); + } + } + } + + if (write_bytes > 0) { + if (write(fd, write_buffer, write_bytes) != write_bytes) + FAIL("unable to write file"); + } + + if (sync_file && fsync(fd) == -1) { + FAIL("unable to sync file"); + } + + if (close(fd) == -1) { + FAIL("unable to close file"); + } + + } else { + + if (rank == 0 && verbose >= 3 + && (itemNum+i) % ITEM_COUNT==0 && (itemNum+i != 0)) { + printf("remove file: %d\n", itemNum+i); + fflush(stdout); + } + + //remove files + sprintf(curr_item, "%sfile.%s%d", path, rm_name, itemNum+i); + if (rank == 0 && verbose >= 2) { + printf("remove file: %s\n", curr_item); + fflush(stdout); + } + if (!(shared_file && rank != 0)) { + if (unlink(curr_item) == -1) { + FAIL("unable to unlink file"); + } + } + } + } + } +} + +/* helper function to do collective operations */ +void collective_helper(int dirs, int create, char* path, int itemNum) { + + int i; + char curr_item[MAX_LEN]; + for (i=0; i= 2) { + printf("create dir : %s\n", curr_item); + fflush(stdout); + } + if (mkdir(curr_item, DIRMODE) == -1) { + FAIL("unable to create directory"); + } + + } else { + + //remove dirs + sprintf(curr_item, "%sdir.%s%d", path, rm_name, itemNum+i); + if (rank == 0 && verbose >= 2) { + printf("remove dir : %s\n", curr_item); + fflush(stdout); + } + if (rmdir(curr_item) == -1) { + FAIL("unable to remove directory"); + } + } + + } else { + + int fd; + if (create) { + + //create files + sprintf(curr_item, "%sfile.%s%d", path, mk_name, itemNum+i); + if (rank == 0 && verbose >= 2) { + printf("create file: %s\n", curr_item); + fflush(stdout); + } + if ((fd = creat(curr_item, FILEMODE)) == -1) { + FAIL("unable to create file"); + } + if (close(fd) == -1) { + FAIL("unable to close file"); + } + + } else { + + //remove files + sprintf(curr_item, "%sfile.%s%d", path, rm_name, itemNum+i); + if (rank == 0 && verbose >= 2) { + printf("remove file: %s\n", curr_item); + fflush(stdout); + } + if (!(shared_file && rank != 0)) { + if (unlink(curr_item) == -1) { + FAIL("unable to unlink file"); + } + } + } + } + } +} + +/* recusive function to create and remove files/directories from the + directory tree */ +void create_remove_items(int currDepth, int dirs, int create, int collective, + char *path, int dirNum) { + + int i; + char dir[MAX_LEN]; + memset(dir, 0, MAX_LEN); + + if (currDepth == 0) { + + /* create items at this depth */ + if (!leaf_only || (depth == 0 && leaf_only)) { + if (collective) { + collective_helper(dirs, create, dir, 0); + } else { + create_remove_items_helper(dirs, create, dir, 0); + } + } + + if (depth > 0) { + create_remove_items(++currDepth, dirs, create, + collective, dir, ++dirNum); + } + + } else if (currDepth <= depth) { + + char temp_path[MAX_LEN]; + strcpy(temp_path, path); + int currDir = dirNum; + + /* iterate through the branches */ + for (i=0; i= 3 && (i%ITEM_COUNT == 0) && (i != 0)) { + printf("stat dir: %d\n", i); + fflush(stdout); + } + sprintf(item, "dir.%s%d", stat_name, item_num); + } else { + if (rank == 0 && verbose >= 3 && (i%ITEM_COUNT == 0) && (i != 0)) { + printf("stat file: %d\n", i); + fflush(stdout); + } + sprintf(item, "file.%s%d", stat_name, item_num); + } + + /* determine the path to the file/dir to be stat'ed */ + parent_dir = item_num / items_per_dir; + + if (parent_dir > 0) { //item is not in tree's root directory + + /* prepend parent directory to item's path */ + sprintf(temp, "%s.%d/%s", base_tree_name, parent_dir, item); + strcpy(item, temp); + + //still not at the tree's root dir + while (parent_dir > branch_factor) { + parent_dir = (int) ((parent_dir-1) / branch_factor); + sprintf(temp, "%s.%d/%s", base_tree_name, parent_dir, item); + strcpy(item, temp); + } + } + + /* below temp used to be hiername */ + if (rank == 0 && verbose >= 2) { + if (dirs) { + printf("stat dir : %s\n", item); + } else { + printf("stat file: %s\n", item); + } + fflush(stdout); + } + if (stat(item, &buf) == -1) { + if (dirs) { + FAIL("unable to stat directory"); + } else { + FAIL("unable to stat file"); + } + } + } +} + + +/* reads all of the items created as specified by the input parameters */ +void mdtest_read(int random, int dirs) { + + int i, parent_dir, item_num = 0; + int fd; + char item[MAX_LEN], temp[MAX_LEN]; + + /* allocate read buffer */ + if (read_bytes > 0) { + read_buffer = (char *)malloc(read_bytes); + if (read_buffer == NULL) { + FAIL("out of memory"); + } + } + + /* determine the number of items to read */ + int stop = 0; + if (leaf_only) { + stop = items_per_dir * pow(branch_factor, depth); + } else { + stop = items; + } + + /* iterate over all of the item IDs */ + for (i = 0; i < stop; i++) { + + memset(&item, 0, MAX_LEN); + memset(temp, 0, MAX_LEN); + + /* determine the item number to read */ + if (random) { + item_num = rand_array[i]; + } else { + item_num = i; + } + + /* make adjustments if in leaf only mode*/ + if (leaf_only) { + item_num += items_per_dir * + (num_dirs_in_tree - pow(branch_factor,depth)); + } + + /* create name of file to read */ + if (dirs) { + ; /* N/A */ + } else { + if (rank == 0 && verbose >= 3 && (i%ITEM_COUNT == 0) && (i != 0)) { + printf("read file: %d\n", i); + fflush(stdout); + } + sprintf(item, "file.%s%d", read_name, item_num); + } + + /* determine the path to the file/dir to be read'ed */ + parent_dir = item_num / items_per_dir; + + if (parent_dir > 0) { //item is not in tree's root directory + + /* prepend parent directory to item's path */ + sprintf(temp, "%s.%d/%s", base_tree_name, parent_dir, item); + strcpy(item, temp); + + //still not at the tree's root dir + while (parent_dir > branch_factor) { + parent_dir = (int) ((parent_dir-1) / branch_factor); + sprintf(temp, "%s.%d/%s", base_tree_name, parent_dir, item); + strcpy(item, temp); + } + } + + /* below temp used to be hiername */ + if (rank == 0 && verbose >= 2) { + if (dirs) { + ; + } else { + printf("read file: %s\n", item); + } + fflush(stdout); + } + + /* open file for reading */ + if ((fd = open(item, O_RDWR, FILEMODE)) == -1) { + FAIL("unable to open file"); + } + + /* read file */ + if (read_bytes > 0) { + if (read(fd, read_buffer, read_bytes) != read_bytes) + FAIL("unable to read file"); + } + + /* close file */ + if (close(fd) == -1) { + FAIL("unable to close file"); + } + } +} + +/* This method should be called by rank 0. It subsequently does all of + the creates and removes for the other ranks */ +void collective_create_remove(int create, int dirs, int ntasks) { + + int i; + char temp[MAX_LEN]; + + /* rank 0 does all of the creates and removes for all of the ranks */ + for (i=0; i 0) { + mdtest_stat(1, 1); + } else { + mdtest_stat(0, 1); + } + + } + + if (barriers) { + MPI_Barrier(testcomm); + } + t[2] = MPI_Wtime(); + + /* read phase */ + if (read_only) { + + if (unique_dir_per_task) { + unique_dir_access(READ_SUB_DIR); + if (!time_unique_dir_overhead) { + offset_timers(t, 2); + } + } + + /* read directories */ + if (random_seed > 0) { + ; /* N/A */ + } else { + ; /* N/A */ + } + + } + + if (barriers) { + MPI_Barrier(testcomm); + } + t[3] = MPI_Wtime(); + + if (remove_only) { + if (unique_dir_per_task) { + unique_dir_access(RM_SUB_DIR); + if (!time_unique_dir_overhead) { + offset_timers(t, 3); + } + } + } + + /* remove phase */ + if (remove_only) { + + /* remove directories */ + if (collective_creates) { + if (rank == 0) { + collective_create_remove(0, 1, ntasks); + } + } else { + create_remove_items(0, 1, 0, 0, NULL, 0); + } + } + + if (barriers) { + MPI_Barrier(testcomm); + } + t[4] = MPI_Wtime(); + + if (remove_only) { + if (unique_dir_per_task) { + unique_dir_access(RM_UNI_DIR); + } + } + if (unique_dir_per_task && !time_unique_dir_overhead) { + offset_timers(t, 4); + } + + MPI_Comm_size(testcomm, &size); + + /* calculate times */ + if (create_only) { + summary_table[iteration].entry[0] = items*size/(t[1] - t[0]); + } else { + summary_table[iteration].entry[0] = 0; + } + if (stat_only) { + summary_table[iteration].entry[1] = items*size/(t[2] - t[1]); + } else { + summary_table[iteration].entry[1] = 0; + } + if (read_only) { + summary_table[iteration].entry[2] = items*size/(t[3] - t[2]); + } else { + summary_table[iteration].entry[2] = 0; + } + if (remove_only) { + summary_table[iteration].entry[3] = items*size/(t[4] - t[3]); + } else { + summary_table[iteration].entry[3] = 0; + } + + if (verbose >= 1 && rank == 0) { + printf(" Directory creation: %10.3f sec, %10.3f ops/sec\n", + t[1] - t[0], summary_table[iteration].entry[0]); + printf(" Directory stat : %10.3f sec, %10.3f ops/sec\n", + t[2] - t[1], summary_table[iteration].entry[1]); +/* N/A + printf(" Directory read : %10.3f sec, %10.3f ops/sec\n", + t[3] - t[2], summary_table[iteration].entry[2]); +*/ + printf(" Directory removal : %10.3f sec, %10.3f ops/sec\n", + t[4] - t[3], summary_table[iteration].entry[3]); + fflush(stdout); + } +} + +void file_test(int iteration, int ntasks) { + int size; + double t[5] = {0}; + + MPI_Barrier(testcomm); + t[0] = MPI_Wtime(); + + /* create phase */ + if (create_only) { + if (unique_dir_per_task) { + unique_dir_access(MK_UNI_DIR); + if (!time_unique_dir_overhead) { + offset_timers(t, 0); + } + } + + /* "touch" the files */ + if (collective_creates) { + if (rank == 0) { + collective_create_remove(1, 0, ntasks); + } + MPI_Barrier(testcomm); + } + + /* create files */ + create_remove_items(0, 0, 1, 0, NULL, 0); + + } + + if (barriers) { + MPI_Barrier(testcomm); + } + t[1] = MPI_Wtime(); + + /* stat phase */ + if (stat_only) { + + if (unique_dir_per_task) { + unique_dir_access(STAT_SUB_DIR); + if (!time_unique_dir_overhead) { + offset_timers(t, 1); + } + } + + /* stat files */ + if (random_seed > 0) { + mdtest_stat(1,0); + } else { + mdtest_stat(0,0); + } + } + + if (barriers) { + MPI_Barrier(testcomm); + } + t[2] = MPI_Wtime(); + + /* read phase */ + if (read_only) { + + if (unique_dir_per_task) { + unique_dir_access(READ_SUB_DIR); + if (!time_unique_dir_overhead) { + offset_timers(t, 2); + } + } + + /* read files */ + if (random_seed > 0) { + mdtest_read(1,0); + } else { + mdtest_read(0,0); + } + } + + if (barriers) { + MPI_Barrier(testcomm); + } + t[3] = MPI_Wtime(); + + if (remove_only) { + if (unique_dir_per_task) { + unique_dir_access(RM_SUB_DIR); + if (!time_unique_dir_overhead) { + offset_timers(t, 3); + } + } + } + + /* remove phase */ + if (remove_only) { + if (collective_creates) { + if (rank == 0) { + collective_create_remove(0, 0, ntasks); + } + } else { + create_remove_items(0, 0, 0, 0, NULL, 0); + } + } + + if (barriers) { + MPI_Barrier(testcomm); + } + t[4] = MPI_Wtime(); + + if (remove_only) { + if (unique_dir_per_task) { + unique_dir_access(RM_UNI_DIR); + } + } + if (unique_dir_per_task && !time_unique_dir_overhead) { + offset_timers(t, 4); + } + + MPI_Comm_size(testcomm, &size); + + /* calculate times */ + if (create_only) { + summary_table[iteration].entry[4] = items*size/(t[1] - t[0]); + } else { + summary_table[iteration].entry[4] = 0; + } + if (stat_only) { + summary_table[iteration].entry[5] = items*size/(t[2] - t[1]); + } else { + summary_table[iteration].entry[5] = 0; + } + if (read_only) { + summary_table[iteration].entry[6] = items*size/(t[3] - t[2]); + } else { + summary_table[iteration].entry[6] = 0; + } + if (remove_only) { + summary_table[iteration].entry[7] = items*size/(t[4] - t[3]); + } else { + summary_table[iteration].entry[7] = 0; + } + + if (verbose >= 1 && rank == 0) { + printf(" File creation : %10.3f sec, %10.3f ops/sec\n", + t[1] - t[0], summary_table[iteration].entry[4]); + printf(" File stat : %10.3f sec, %10.3f ops/sec\n", + t[2] - t[1], summary_table[iteration].entry[5]); + printf(" File read : %10.3f sec, %10.3f ops/sec\n", + t[3] - t[2], summary_table[iteration].entry[6]); + printf(" File removal : %10.3f sec, %10.3f ops/sec\n", + t[4] - t[3], summary_table[iteration].entry[7]); + fflush(stdout); + } +} + +void print_help() { + char * opts[] = { +"Usage: mdtest [-b branching_factor] [-B] [-c] [-C] [-d testdir] [-D] [-e number_of_bytes_to_read]", +" [-E] [-f first] [-F] [-h] [-i iterations] [-I items_per_dir] [-l last] [-L]", +" [-n number_of_items] [-N stride_length] [-p seconds] [-r]", +" [-R[seed]] [-s stride] [-S] [-t] [-T] [-u] [-v]", +" [-V verbosity_value] [-w number_of_bytes_to_write] [-y] [-z depth]", +"\t-b: branching factor of hierarchical directory structure", +"\t-B: no barriers between phases", +"\t-c: collective creates: task 0 does all creates", +"\t-C: only create files/dirs", +"\t-d: the directory in which the tests will run", +"\t-D: perform test on directories only (no files)", +"\t-e: bytes to read from each file", +"\t-E: only read files/dir", +"\t-f: first number of tasks on which the test will run", +"\t-F: perform test on files only (no directories)", +"\t-h: prints this help message", +"\t-i: number of iterations the test will run", +"\t-I: number of items per directory in tree", +"\t-l: last number of tasks on which the test will run", +"\t-L: files only at leaf level of tree", +"\t-n: every process will creat/stat/read/remove # directories and files", +"\t-N: stride # between neighbor tasks for file/dir operation (local=0)", +"\t-p: pre-iteration delay (in seconds)", +"\t-r: only remove files or directories left behind by previous runs", +"\t-R: randomly stat files (optional argument for random seed)", +"\t-s: stride between the number of tasks for each test", +"\t-S: shared file access (file only, no directories)", +"\t-t: time unique working directory overhead", +"\t-T: only stat files/dirs", +"\t-u: unique working directory for each task", +"\t-v: verbosity (each instance of option increments by one)", +"\t-V: verbosity value", +"\t-w: bytes to write to each file after it is created", +"\t-y: sync file after writing", +"\t-z: depth of hierarchical directory structure", +"" +}; + int i, j; + + for (i = 0; strlen(opts[i]) > 0; i++) + printf("%s\n", opts[i]); + fflush(stdout); + + MPI_Initialized(&j); + if (j) { + MPI_Finalize(); + } + exit(0); +} + +void summarize_results(int iterations) { + char access[MAX_LEN]; + int i, j, k; + int start, stop, tableSize = 10; + double min, max, mean, sd, sum = 0, var = 0, curr = 0; + + double all[iterations * size * tableSize]; + MPI_Barrier(MPI_COMM_WORLD); + MPI_Gather(&summary_table->entry[0], tableSize*iterations, + MPI_DOUBLE, all, tableSize*iterations, MPI_DOUBLE, + 0, MPI_COMM_WORLD); + + if (rank == 0) { + + printf("\nSUMMARY: (of %d iterations)\n", iterations); + printf( + " Operation Max Min Mean Std Dev\n"); + printf( + " --------- --- --- ---- -------\n"); + fflush(stdout); + + /* if files only access, skip entries 0-3 (the dir tests) */ + if (files_only && !dirs_only) { + start = 4; + } else { + start = 0; + } + + /* if directories only access, skip entries 4-7 (the file tests) */ + if (dirs_only && !files_only) { + stop = 4; + } else { + stop = 8; + } + + /* special case: if no directory or file tests, skip all */ + if (!dirs_only && !files_only) { + start = stop = 0; + } + + /* calculate aggregates */ + if (barriers) { + double maxes[iterations]; + + + /* Because each proc times itself, in the case of barriers we + * have to backwards calculate the time to simulate the use + * of barriers. + */ + for (i = start; i < stop; i++) { + for (j=0; j maxes[j]) { + min = maxes[j]; + } + if (max < maxes[j]) { + max = maxes[j]; + } + sum += maxes[j]; + } + mean = sum / iterations; + for (j=0; j curr) { + min = curr; + } + if (max < curr) { + max = curr; + } + sum += curr; + } + } + mean = sum / (iterations * size); + for (k=0; k curr) { + min = curr; + } + if (max < curr) { + max = curr; + } + sum += curr; + } + mean = sum / (iterations); + for (j = 0; j < iterations; j++) { + var += pow((mean - summary_table[j].entry[i]), 2); + } + var = var / (iterations); + sd = sqrt(var); + switch (i) { + case 8: strcpy(access, "Tree creation :"); break; + case 9: strcpy(access, "Tree removal :"); break; + default: strcpy(access, "ERR"); break; + } + printf(" %s ", access); + printf("%10.3f ", max); + printf("%10.3f ", min); + printf("%10.3f ", mean); + printf("%10.3f\n", sd); + fflush(stdout); + sum = var = 0; + } + } +} + +/* Checks to see if the test setup is valid. If it isn't, fail. */ +void valid_tests() { + + /* if dirs_only and files_only were both left unset, set both now */ + if (!dirs_only && !files_only) { + dirs_only = files_only = 1; + } + + /* if shared file 'S' access, no directory tests */ + if (shared_file) { + dirs_only = 0; + } + + /* check for collective_creates incompatibilities */ + if (shared_file && collective_creates && rank == 0) { + FAIL("-c not compatible with -S"); + } + if (path_count > 1 && collective_creates && rank == 0) { + FAIL("-c not compatible with multiple test directories"); + } + if (collective_creates && !barriers) { + FAIL("-c not compatible with -B"); + } + + /* check for shared file incompatibilities */ + if (unique_dir_per_task && shared_file && rank == 0) { + FAIL("-u not compatible with -S"); + } + + /* check multiple directory paths and strided option */ + if (path_count > 1 && nstride > 0) { + FAIL("cannot have multiple directory paths with -N strides between neighbor tasks"); + } + + /* check for shared directory and multiple directories incompatibility */ + if (path_count > 1 && unique_dir_per_task != 1) { + FAIL("shared directory mode is not compatible with multiple directory paths"); + } + + /* check if more directory paths than ranks */ + if (path_count > size) { + FAIL("cannot have more directory paths than MPI tasks"); + } + + /* check depth */ + if (depth < 0) { + FAIL("depth must be greater than or equal to zero"); + } + /* check branch_factor */ + if (branch_factor < 1 && depth > 0) { + FAIL("branch factor must be greater than or equal to zero"); + } + /* check for valid number of items */ + if ((items > 0) && (items_per_dir > 0)) { + FAIL("only specify the number of items or the number of items per directory"); + } + +} + +void show_file_system_size(char *file_system) { + char real_path[MAX_LEN]; + char file_system_unit_str[MAX_LEN] = "GiB"; + char inode_unit_str[MAX_LEN] = "Mi"; + long long int file_system_unit_val = 1024 * 1024 * 1024; + long long int inode_unit_val = 1024 * 1024; + long long int total_file_system_size, + free_file_system_size, + total_inodes, + free_inodes; + double total_file_system_size_hr, + used_file_system_percentage, + used_inode_percentage; + struct statfs status_buffer; + + if (statfs(file_system, &status_buffer) != 0) { + FAIL("unable to statfs() file system"); + } + + /* data blocks */ + total_file_system_size = status_buffer.f_blocks * status_buffer.f_bsize; + free_file_system_size = status_buffer.f_bfree * status_buffer.f_bsize; + used_file_system_percentage = (1 - ((double)free_file_system_size + / (double)total_file_system_size)) * 100; + total_file_system_size_hr = (double)total_file_system_size + / (double)file_system_unit_val; + if (total_file_system_size_hr > 1024) { + total_file_system_size_hr = total_file_system_size_hr / 1024; + strcpy(file_system_unit_str, "TiB"); + } + + /* inodes */ + total_inodes = status_buffer.f_files; + free_inodes = status_buffer.f_ffree; + used_inode_percentage = (1 - ((double)free_inodes/(double)total_inodes)) + * 100; + + /* show results */ + if (realpath(file_system, real_path) == NULL) { + FAIL("unable to use realpath()"); + } + fprintf(stdout, "Path: %s\n", real_path); + fprintf(stdout, "FS: %.1f %s Used FS: %2.1f%% ", + total_file_system_size_hr, file_system_unit_str, + used_file_system_percentage); + fprintf(stdout, "Inodes: %.1f %s Used Inodes: %2.1f%%\n", + (double)total_inodes / (double)inode_unit_val, + inode_unit_str, used_inode_percentage); + fflush(stdout); + + return; +} + +void display_freespace(char *testdirpath) +{ + char dirpath[MAX_LEN] = {0}; + int i; + int directoryFound = 0; + + strcpy(dirpath, testdirpath); + + /* get directory for outfile */ + i = strlen(dirpath); + while (i-- > 0) { + if (dirpath[i] == '/') { + dirpath[i] = '\0'; + directoryFound = 1; + break; + } + } + + /* if no directory/, use '.' */ + if (directoryFound == 0) { + strcpy(dirpath, "."); + } + + show_file_system_size(dirpath); + + return; +} + +void create_remove_directory_tree(int create, + int currDepth, char* path, int dirNum) { + + int i; + char dir[MAX_LEN]; + + if (currDepth == 0) { + + sprintf(dir, "%s.%d/", base_tree_name, dirNum); + + if (create) { + if (rank == 0 && verbose >= 2) { + printf("making: %s\n", dir); + fflush(stdout); + } + if (mkdir(dir, DIRMODE) == -1) { + FAIL("Unable to create directory"); + } + } + + create_remove_directory_tree(create, ++currDepth, dir, ++dirNum); + + if (!create) { + if (rank == 0 && verbose >= 2) { + printf("remove: %s\n", dir); + fflush(stdout); + } + if (rmdir(dir) == -1) { + FAIL("Unable to remove directory"); + } + } + + } else if (currDepth <= depth) { + + char temp_path[MAX_LEN]; + strcpy(temp_path, path); + int currDir = dirNum; + + for (i=0; i= 2) { + printf("making: %s\n", temp_path); + fflush(stdout); + } + if (mkdir(temp_path, DIRMODE) == -1) { + FAIL("Unable to create directory"); + } + } + + create_remove_directory_tree(create, ++currDepth, + temp_path, (branch_factor*currDir)+1); + currDepth--; + + if (!create) { + if (rank == 0 && verbose >= 2) { + printf("remove: %s\n", temp_path); + fflush(stdout); + } + if (rmdir(temp_path) == -1) { + FAIL("Unable to remove directory"); + } + } + + strcpy(temp_path, path); + currDir++; + } + } +} + +int main(int argc, char **argv) { + int i, j, c; + int nodeCount; + MPI_Group worldgroup, testgroup; + struct { + int first; + int last; + int stride; + } range = {0, 0, 1}; + int first = 1; + int last = 0; + int stride = 1; + int iterations = 1; + + /* Check for -h parameter before MPI_Init so the mdtest binary can be + called directly, without, for instance, mpirun. */ + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--help")) { + print_help(); + } + } + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + nodeCount = size / count_tasks_per_node(); + + if (rank == 0) { + printf("-- started at %s --\n\n", timestamp()); + printf("mdtest-%s was launched with %d total task(s) on %d nodes\n", + RELEASE_VERS, size, nodeCount); + fflush(stdout); + } + + if (rank == 0) { + fprintf(stdout, "Command line used:"); + for (i = 0; i < argc; i++) { + fprintf(stdout, " %s", argv[i]); + } + fprintf(stdout, "\n"); + fflush(stdout); + } + + /* Parse command line options */ + while (1) { + c = getopt(argc, argv, "b:BcCd:De:Ef:Fhi:I:l:Ln:N:p:rR::s:StTuvV:w:yz:"); + if (c == -1) { + break; + } + + switch (c) { + case 'b': + branch_factor = atoi(optarg); break; + case 'B': + barriers = 0; break; + case 'c': + collective_creates = 1; break; + case 'C': + create_only = 1; break; + case 'd': + parse_dirpath(optarg); break; + case 'D': + dirs_only = 1; break; + case 'e': + read_bytes = atoi(optarg); break; + case 'E': + read_only = 1; break; + case 'f': + first = atoi(optarg); break; + case 'F': + files_only = 1; break; + case 'h': + print_help(); break; + case 'i': + iterations = atoi(optarg); break; + case 'I': + items_per_dir = atoi(optarg); break; + case 'l': + last = atoi(optarg); break; + case 'L': + leaf_only = 1; break; + case 'n': + items = atoi(optarg); break; + case 'N': + nstride = atoi(optarg); break; + case 'p': + pre_delay = atoi(optarg); break; + case 'r': + remove_only = 1; break; + case 'R': + if (optarg == NULL) { + random_seed = time(NULL); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Bcast(&random_seed, 1, MPI_INT, 0, MPI_COMM_WORLD); + random_seed += rank; + } else { + random_seed = atoi(optarg)+rank; + } + break; + case 's': + stride = atoi(optarg); break; + case 'S': + shared_file = 1; break; + case 't': + time_unique_dir_overhead = 1; break; + case 'T': + stat_only = 1; break; + case 'u': + unique_dir_per_task = 1; break; + case 'v': + verbose += 1; break; + case 'V': + verbose = atoi(optarg); break; + case 'w': + write_bytes = atoi(optarg); break; + case 'y': + sync_file = 1; break; + case 'z': + depth = atoi(optarg); break; + } + } + + if (!create_only && !stat_only && !read_only && !remove_only) { + create_only = stat_only = read_only = remove_only = 1; + } + + valid_tests(); + + /* setup total number of items and number of items per dir */ + if (depth <= 0) { + num_dirs_in_tree = 1; + } else { + if (branch_factor < 1) { + num_dirs_in_tree = 1; + } else if (branch_factor == 1) { + num_dirs_in_tree = depth + 1; + } else { + num_dirs_in_tree = + (1 - pow(branch_factor, depth+1)) / (1 - branch_factor); + } + } + if (items_per_dir > 0) { + items = items_per_dir * num_dirs_in_tree; + } else { + if (leaf_only) { + if (branch_factor <= 1) { + items_per_dir = items; + } else { + items_per_dir = items / pow(branch_factor, depth); + items = items_per_dir * pow(branch_factor, depth); + } + } else { + items_per_dir = items / num_dirs_in_tree; + items = items_per_dir * num_dirs_in_tree; + } + } + + /* initialize rand_array */ + if (random_seed > 0) { + srand(random_seed); + + int stop = 0; + if (leaf_only) { + stop = items_per_dir * pow(branch_factor, depth); + } else { + stop = items; + } + rand_array = (int*) malloc(stop * sizeof(int)); + + for (i=0; i1) { + n--; + int k = rand() % (n+1); + int tmp = rand_array[k]; + rand_array[k] = rand_array[n]; + rand_array[n] = tmp; + } + } + + /* allocate and initialize write buffer with # */ + if (write_bytes > 0) { + write_buffer = (char *)malloc(write_bytes); + if (write_buffer == NULL) { + FAIL("out of memory"); + } + memset(write_buffer, 0x23, write_bytes); + } + + /* setup directory path to work in */ + if (path_count == 0) { /* special case where no directory path provided with '-d' option */ + getcwd(testdirpath, MAX_LEN); + path_count = 1; + } else { + strcpy(testdirpath, filenames[rank%path_count]); + } + + /* display disk usage */ + if (rank == 0) display_freespace(testdirpath); + + if (rank == 0) { + if (random_seed > 0) { + printf("random seed: %d\n", random_seed); + } + } + + /* if directory does not exist, create it */ + if ((rank < path_count) && chdir(testdirpath) == -1) { + if (mkdir(testdirpath, DIRMODE) == - 1) { + FAIL("Unable to create test directory path"); + } + } + + if (gethostname(hostname, MAX_LEN) == -1) { + perror("gethostname"); + MPI_Abort(MPI_COMM_WORLD, 2); + } + if (last == 0) { + first = size; + last = size; + } + + /* setup summary table for recording results */ + summary_table = (table_t *)malloc(iterations * sizeof(table_t)); + if (summary_table == NULL) { + FAIL("out of memory"); + } + + if (unique_dir_per_task) { + sprintf(base_tree_name, "mdtest_tree.%d", rank); + } else { + sprintf(base_tree_name, "mdtest_tree"); + } + + /* start and end times of directory tree create/remove */ + double startCreate, endCreate; + + /* default use shared directory */ + strcpy(mk_name, "mdtest.shared."); + strcpy(stat_name, "mdtest.shared."); + strcpy(read_name, "mdtest.shared."); + strcpy(rm_name, "mdtest.shared."); + + MPI_Comm_group(MPI_COMM_WORLD, &worldgroup); + /* Run the tests */ + for (i = first; i <= last && i <= size; i += stride) { + range.last = i - 1; + MPI_Group_range_incl(worldgroup, 1, (void *)&range, &testgroup); + MPI_Comm_create(MPI_COMM_WORLD, testgroup, &testcomm); + if (rank == 0) { + if (files_only && dirs_only) { + printf("\n%d tasks, %d files/directories\n", i, i * items); + } else if (files_only) { + printf("\n%d tasks, %d files\n", i, i * items); + } else if (dirs_only) { + printf("\n%d tasks, %d directories\n", i, i * items); + } + } + if (rank == 0 && verbose >= 1) { + printf("\n"); + printf(" Operation Duration Rate\n"); + printf(" --------- -------- ----\n"); + } + for (j = 0; j < iterations; j++) { + if (rank == 0 && verbose >= 1) { + printf(" * iteration %d *\n", j+1); + fflush(stdout); + } + + strcpy(testdir, testdirpath); + strcat(testdir, "/"); + strcat(testdir, TEST_DIR); + sprintf(testdir, "%s.%d", testdir, j); + if ((rank < path_count) && chdir(testdir) == -1) { + if (mkdir(testdir, DIRMODE) == - 1) { + FAIL("Unable to create test directory"); + } + } + MPI_Barrier(MPI_COMM_WORLD); + if (chdir(testdir) == -1) { + FAIL("Unable to change to test directory"); + } + /* create hierarchical directory structure */ + MPI_Barrier(MPI_COMM_WORLD); + if (create_only) { + startCreate = MPI_Wtime(); + if (unique_dir_per_task) { + if (collective_creates && (rank == 0)) { + for (i=0; i= 1 && rank == 0) { + printf(" Tree creation : %10.3f sec, %10.3f ops/sec\n", + (endCreate - startCreate), summary_table[j].entry[8]); + fflush(stdout); + } + } else { + summary_table[j].entry[8] = 0; + } + sprintf(unique_mk_dir, "%s/%s.0", testdir, base_tree_name); + sprintf(unique_chdir_dir, "%s/%s.0", testdir, base_tree_name); + sprintf(unique_stat_dir, "%s/%s.0", testdir, base_tree_name); + sprintf(unique_read_dir, "%s/%s.0", testdir, base_tree_name); + sprintf(unique_rm_dir, "%s/%s.0", testdir, base_tree_name); + sprintf(unique_rm_uni_dir, "%s", testdir); + + if (!unique_dir_per_task) { + if (chdir(unique_mk_dir) == -1) { + FAIL("unable to change to shared tree directory"); + } + } + + if (rank < i) { + if (!shared_file) { + sprintf(mk_name, "mdtest.%d.", (rank+(0*nstride))%i); + sprintf(stat_name, "mdtest.%d.", (rank+(1*nstride))%i); + sprintf(read_name, "mdtest.%d.", (rank+(2*nstride))%i); + sprintf(rm_name, "mdtest.%d.", (rank+(3*nstride))%i); + } + if (unique_dir_per_task) { + sprintf(unique_mk_dir, "%s/mdtest_tree.%d.0", testdir, + (rank+(0*nstride))%i); + sprintf(unique_chdir_dir, "%s/mdtest_tree.%d.0", testdir, + (rank+(1*nstride))%i); + sprintf(unique_stat_dir, "%s/mdtest_tree.%d.0", testdir, + (rank+(2*nstride))%i); + sprintf(unique_read_dir, "%s/mdtest_tree.%d.0", testdir, + (rank+(3*nstride))%i); + sprintf(unique_rm_dir, "%s/mdtest_tree.%d.0", testdir, + (rank+(4*nstride))%i); + sprintf(unique_rm_uni_dir, "%s", testdir); + } + strcpy(top_dir, unique_mk_dir); + if (dirs_only && !shared_file) { + if (pre_delay) { + delay_secs(pre_delay); + } + directory_test(j, i); + } + if (files_only) { + if (pre_delay) { + delay_secs(pre_delay); + } + file_test(j, i); + } + } + + /* remove directory structure */ + if (!unique_dir_per_task) { + if (chdir(testdir) == -1) { + FAIL("unable to change to tree directory"); + } + } + MPI_Barrier(MPI_COMM_WORLD); + if (remove_only) { + startCreate = MPI_Wtime(); + if (unique_dir_per_task) { + if (collective_creates && (rank == 0)) { + for (i=0; i= 1 && rank == 0) { + printf(" Tree removal : %10.3f sec, %10.3f ops/sec\n", + (endCreate - startCreate), summary_table[j].entry[9]); + fflush(stdout); + } + } else { + summary_table[j].entry[9] = 0; + } + } + summarize_results(iterations); + if (i == 1 && stride > 1) { + i = 0; + } + } + + if (rank == 0) { + printf("\n-- finished at %s --\n", timestamp()); + fflush(stdout); + } + + if (random_seed > 0) { + free(rand_array); + } + + MPI_Finalize(); + exit(0); +} diff --git a/microbenchmarks/mdtest/scripts/WRAPPER_README b/microbenchmarks/mdtest/scripts/WRAPPER_README new file mode 100644 index 00000000..ec7e72fa --- /dev/null +++ b/microbenchmarks/mdtest/scripts/WRAPPER_README @@ -0,0 +1,49 @@ +======================== +mdtest_wrapper.py README +======================== + +mdtest_wrapper.py is a wrapper for mdtest that inserts the results into +a database. + +-------------- +Prerequisites: +-------------- + +Python 2.3.4 or higher +setuptools (Python module installer) +MySQLdb (a Python module) + + +------------- +Installation: +------------- + +---setuptools--- + +This is available http://pypi.python.org/pypi/setuptools. Installing +from source is probably the least hassle. + +Unzip and untar the package +Change directories to the top level directory for setuptools +Run: python setup.py build +Run: python setup.py install --prefix=/some/install/directory + NOTE: --prefix arg is only necessary if you do not have + root permissions or you want to install the module into + some non-default directory +If you designated some non-default install location, then add that + directory to PYTHONPATH + + +---MySQLdb--- + +This is available at http://sourceforge.net/projects/mysql-python/. +Installing this module proceeds in the same manner as setuptools. + + +------ +Usage: +------ + +python mdtest_wrapper.py mpirun [mpirun args] ./mdtest [mdtest args] [--desc descriptionOfTest] + +NOTE: mdtest needs to be compiled before running mdtest_wrapper. diff --git a/microbenchmarks/mdtest/scripts/env_to_db.tcsh b/microbenchmarks/mdtest/scripts/env_to_db.tcsh new file mode 100755 index 00000000..eccafebb --- /dev/null +++ b/microbenchmarks/mdtest/scripts/env_to_db.tcsh @@ -0,0 +1,127 @@ +#! /bin/tcsh +# whatever this spits out in the form of # key val +# can be parsed by the fs_test and will be +# injected into the DB (so long as the key exists in the schema) +# the format is key [space] val +# currently the val can't have spaces in it... +# to pull this into the DB through the fs_test, set the +# FS_TEST_EXTRA environment variable to point to this file + + +# set up +set target = $1 + +# if the user specified an fs:/ notation for MPI-IO, then strip it +set target = `echo $target | sed 's/.*://g'` +set target_dir = $target:h +set tpf = $HOME/Testing/tpf/src/tpf_panfs.x + +# mpihome +echo "mpihome $MPIHOME" + +# segment +echo "segment $HOSTNAME" + +# user +echo "user $USER" + +# system +echo "system $HOSTNAME" + +# date_ts +set date_ts = `date +%s` +echo "date_ts $date_ts" + +# mpihost +if ( $?MY_MPI_HOST ) then + echo "mpihost $MY_MPI_HOST" +endif + +# os_version +set os_version = `uname -r` +echo "os_version $os_version"s + +# yyyymmdd +set yyyymmdd = `date +%F` +echo "yyyymmdd $yyyymmdd" + +# jobid +if ( $?PBS_JOBID ) then + echo "jobid $PBS_JOBID" +else if ( $?LFS_JOBID ) then + echo "jobid $LFS_JOB" +endif + +# mpi_version +echo "mpi_version $MPI_VERSION" + +# host list +#env | grep -i node +if ( $?PBS_NODEFILE ) then + set host_list = `cat $PBS_NODEFILE | tr '\n' ','` + echo "host_list $host_list" +endif + +# procs_per_node +if ( $?PBS_NODEFILE ) then + set shortname = `hostname -s` + set procs_per_node = `cat $PBS_NODEFILE | grep $shortname | wc -l` + echo "procs_per_node $procs_per_node" +endif + +# grab the ionode list +set ionodes = `/sbin/ip route | awk '/nexthop/ {print $3}' | sort | uniq` +set num_ionodes = `echo $ionodes | wc -w` +set ionodes = `echo "$ionodes" | tr ' ' ','` +echo "ionodes $ionodes" +echo "num_ionodes $num_ionodes" + +# grab the panfs mount options +# if panfs has multiple mounts, this might get the wrong one... +set panfs_mnt = `mount -t panfs | tr '\n' '|' | tr ' ' '_'` +echo "panfs_mnt $panfs_mnt" + +# get panfs client version +set panfs_trace1 = /usr/sbin/panfs_trace +set panfs_trace2 = /usr/local/sbin/panfs_trace +if ( -x $panfs_trace1 ) then + set client_version = `$panfs_trace1 --version $target_dir | awk '{print $4$5}' | head -1` + echo "panfs $client_version" +else if ( -x $panfs_trace2 ) then + set client_version = `$panfs_trace2 --version $target_dir | awk '{print $4$5}' | head -1` + echo "panfs $client_version" +else + echo "error couldnt_discover_panfs_version" +endif + +# get thread count +set thread_count = `ps auxw | grep kpanfs_thpool | grep -v grep | wc -l` +echo "panfs_threads $thread_count" + +# get df numbers +set df_perc = `df $target_dir -t panfs -P | tail -1 | awk '{print $5}' | sed s/%//` +set df_tot = `df $target_dir -t panfs -P | tail -1 | awk '{print $2}'` +echo "df_perc_before $df_perc" +echo "df_tot_before $df_tot" + +# grab tpf info +if ( "X$target" != "X" ) then + if ( -d $target_dir ) then + if ( -x $tpf ) then + $tpf default $target_dir |& awk \ + '/Components/ {print "panfs_comps "$5} \ + /RAID width/ {print "panfs_width "$3} \ + /Depth/ {print "panfs_depth "$2} \ + /Stride/ {print "panfs_stripe "$3} \ + /Layout Policy/ {print "panfs_visit "$3} \ + /Layout Type/ {print "panfs_type "$3} \ + ' + else + echo "error no_valid_tpf_executable" + endif + else + echo "error no_valid_target_dir_$target_dir" + endif +else + echo "error no_valid_target" +endif diff --git a/microbenchmarks/mdtest/scripts/mdtest_wrapper.py b/microbenchmarks/mdtest/scripts/mdtest_wrapper.py new file mode 100755 index 00000000..457da3df --- /dev/null +++ b/microbenchmarks/mdtest/scripts/mdtest_wrapper.py @@ -0,0 +1,533 @@ +#! /usr/bin/env python + +########################### mdtest_wrapper.py ############################## +# +#This program is a wrapper for mdtest. It will execute mdtest and parse the +#output. The result will then be inserted into a database. If the database +#doesn't exist, then the query is written to a file. +# +#To run this program, run the following command: +#python mdtest_wrapper.py mpirun [mpirun args] /path/to/mdtest [mdtest args] +# +#Written by: Ryan Kroiss +#Last modified: 07/24/2009 +# +############################################################################ + +import getopt,sys,os,array,string,time,user +import MySQLdb as db + +import sys + + + +def fail(message): + print message + sys.exit() + +### customized parsing method for mdtest ### +def parseArgs(args, db_dict): + + for i in range(0, len(args)): + if (args[i].startswith('-')): + set = False + o = args[i] + if (i+1 <= (len(args)-1)): + if (not args[i+1].startswith('-')): + a = args[i+1] + set = True + + if o == "-b": + if (not set): + fail("Improperly formatted arguments") + db_dict['branch_factor'] = a + elif o == "-B": + db_dict['no_barriers'] = 1 + elif o == "-c": + db_dict['collective_creates'] = 1 + elif o == "-C": + db_dict['create_only'] = 1 + elif o == "-d": + if (not set): + fail("Improperly formatted arguments") + db_dict['working_directory'] = a + elif o == '--desc': + continue + elif o == "-D": + db_dict['directories_only'] = 1 + elif o == "-f": + if (not set): + fail("Improperly formatted arguments") + db_dict['first_task'] = a + elif o == "-F": + db_dict['files_only'] = 1 + elif o == "-h": + continue + elif o == "-i": + if (not set): + fail("Improperly formatted arguments") + db_dict['iterations'] = a + elif o == "-I": + if (not set): + fail("Improperly formatted arguments") + db_dict['items_per_dir'] = a + elif o == "-l": + if (not set): + fail("Improperly formatted arguments") + db_dict['last_task'] = a + elif o == "-L": + db_dict['leaf_only'] = 1 + elif o == "-n": + if (not set): + fail("Improperly formatted arguments") + db_dict['items'] = a + elif o == "-N": + if (not set): + fail("Improperly formatted arguments") + db_dict['nstride'] = a + elif o == "-p": + if (not set): + fail("Improperly formatted arguments") + db_dict['pre_delay'] = a + elif o == "-r": + db_dict['remove_only'] = 1 + #elif o.startswith('-R'): + #don't do anything here because the random seed is caught in the output of the test + elif o == "-s": + if (not set): + fail("Improperly formatted arguments") + db_dict['stride'] = a + elif o == "-S": + db_dict['shared_file'] = 1 + elif o == "-t": + db_dict['time_unique_dir_overhead'] = 1 + elif o == "-T": + db_dict['stat_only'] = 1 + elif o == "-u": + db_dict['unique_dir_per_task'] = 1 + elif o == "-v": + continue + elif o == "-V": + continue + elif o == "-w": + if (not set): + fail("Improperly formatted arguments") + db_dict['write_bytes'] = a + elif o == "-y": + db_dict['sync_file'] = 1 + elif o == "-z": + if (not set): + fail("Improperly formatted arguments") + db_dict['depth'] = a + else: + if (not o.startswith('-R')): + print o + fail("Incorrect flag - check mdtest usage") + + return db_dict + + +###### creates db insert query from db_data dictionary +###### then executes query +def db_insert(dbconn, db_data): + + ###### create insert query ###### + query = "INSERT INTO mdtest (" + + count = 0 + + ### append column names to query ### + for key in db_data.keys(): + if (db_data.get(key) != None): + if (count == 1): + query += ',' + count = 1 + query += key + + query += ") VALUES ('" + count = 0 + + ### append values to query ### + for value in db_data.values(): + if (value != None): + if (count == 1): + query += "','" + count = 1 + query += str(value) + + query += "')" + + db_success=False + try: + ### connect to the database ### + raise SystemError # don't even bother, just dump to file + conn = db.connect(host="phpmyadmin",db="mpi_io_test_pro",user="cron", + passwd="hpciopwd") + cursor = conn.cursor() + + ### execute the query ### + cursor.execute(query) + + ### close connection ### + cursor.close() + conn.close() + + print "Query inserted into database" + db_success=True + + except: + + sql_file = os.getenv('HOME') + '/mdtest.sql_query' + + ### if unable to connect to db, print query to file sql_query ### + try: + f = open(sql_file,'a') + except: + f = open(sql_file,'w') + try: + f.write(query + ';\n') + f.close() + print "Appended query to file: %s" % sql_file + db_success=True + except: + print "Unable to append query to file: %s" % sql_file + + #finally: + + ### when all else fails print query to standard out ### + if db_success is False: print query + + + +def main(): + + ### check for minimum number of arguments ### + if (len(sys.argv) < 3): + print "Your command needs to have more that three arguments." + print "It should look something like this:" + print "python mdtest_wrapper.py mpirun..." + sys.exit() + + command_line =" ".join(sys.argv) + + ### find index of first arg of mdtest command ### + last = len(sys.argv) + description = None + env_to_db = None + last = len(sys.argv) + for a in sys.argv: + if (a == '--desc'): + index = sys.argv.index(a) + 1 + if (index < len(sys.argv)): + description = sys.argv[index] + last = last - 2 + if (a == '--env_to_db'): + index = sys.argv.index(a) + 1 + if (index < len(sys.argv)): + env_to_db = sys.argv[index] + last = last - 2 + + ### get command to execute ### + command = sys.argv[1] + for s in sys.argv[2:last]: + command += " " + s + + + ### run command and print db_data to standard out ### + walltime = int(time.time()) + p = os.popen(command) + mdtest_output = p.read() + walltime = int(time.time()) - walltime + print mdtest_output + + ###### set up dictionary of values ###### + db_data = dict() + + ###### keys for output ####### + db_data['user'] = None + db_data['system'] = None + db_data['date_ts'] = None + db_data['description'] = description + + ####### initialize mdtest parameters output ######## + db_data['collective_creates'] = None + db_data['working_directory'] = None + db_data['directories_only'] = None + db_data['files_only'] = None + db_data['first_task'] = None + db_data['last_task'] = None + db_data['iterations'] = None + db_data['items'] = None + db_data['items_per_dir'] = None + db_data['nstride'] = None + db_data['stride'] = None + db_data['pre_delay'] = None + db_data['remove_only'] = None + db_data['shared_file'] = None + db_data['time_unique_dir_overhead'] = None + db_data['unique_dir_per_task'] = None + db_data['write_bytes'] = None + db_data['sync_file'] = None + db_data['branch_factor'] = None + db_data['depth'] = None + db_data['random_stat'] = None + db_data['no_barriers'] = None + db_data['create_only'] = None + db_data['leaf_level'] = None + db_data['stat_only'] = None + + + ####### initialize mdtest environment output ####### + db_data['mdtest_version'] = None + db_data['num_tasks'] = None + db_data['num_nodes'] = None + db_data['command_line'] = command_line + db_data['path'] = None + db_data['fs_size'] = None + db_data['fs_used_pct'] = None + db_data['inodes_size'] = None + db_data['inodes_used_pct'] = None + db_data['walltime'] = str(walltime) + + ####### initialize mdtest operations output ######## + db_data['dir_create_max'] = None + db_data['dir_create_min'] = None + db_data['dir_create_mean'] = None + db_data['dir_create_stddev'] = None + db_data['dir_stat_max'] = None + db_data['dir_stat_min'] = None + db_data['dir_stat_mean'] = None + db_data['dir_stat_stddev'] = None + db_data['dir_remove_max'] = None + db_data['dir_remove_min'] = None + db_data['dir_remove_mean'] = None + db_data['dir_remove_stddev'] = None + db_data['file_create_max'] = None + db_data['file_create_min'] = None + db_data['file_create_mean'] = None + db_data['file_create_stddev'] = None + db_data['file_stat_max'] = None + db_data['file_stat_min'] = None + db_data['file_stat_mean'] = None + db_data['file_stat_stddev'] = None + db_data['file_remove_max'] = None + db_data['file_remove_min'] = None + db_data['file_remove_mean'] = None + db_data['file_remove_stddev'] = None + db_data['tree_create'] = None + db_data['tree_remove'] = None + + ######## initialize system output ######### + db_data['mpihome'] = None + db_data['mpihost'] = None + db_data['mpi_version'] = None + db_data['segment'] = None + db_data['os_version'] = None + db_data['yyyymmdd'] = None + db_data['jobid'] = None + db_data['host_list'] = None + db_data['panfs'] = None + db_data['panfs_srv'] = None + db_data['panfs_type'] = None + db_data['panfs_stripe'] = None + db_data['panfs_width'] = None + db_data['panfs_depth'] = None + db_data['panfs_comps'] = None + db_data['panfs_visit'] = None + db_data['panfs_mnt'] = None + db_data['panfs_threads'] = None + db_data['ionodes'] = None + db_data['num_ionodes'] = None + db_data['procs_per_node'] = None + + + ### set working_directory to cwd if user didn't specify one + if (db_data['working_directory'] == None): + db_data['working_directory'] = os.getcwd() + + ####### run env_to_db and parse output ###### + if (env_to_db is not None and os.path.exists(env_to_db)): + command = "%s %s" % (env_to_db, db_data['working_directory']) + p = os.popen(command) + env_result = p.read() + lines = env_result.splitlines() + for line in lines: + tokens = line.split() + if (len(tokens) >= 2): + if (tokens[0] == 'ionodes'): + db_data['ionodes'] = tokens[1] + elif (tokens[0] == 'num_ionodes'): + db_data['num_ionodes'] = tokens[1] + elif (tokens[0] == 'panfs_mnt'): + db_data['panfs_mnt'] = tokens[1] + elif (tokens[0] == 'panfs_type'): + db_data['panfs_type'] = tokens[1] + elif (tokens[0] == 'panfs_comps'): + db_data['panfs_comps'] = tokens[1] + elif (tokens[0] == 'panfs_stripe'): + db_data['panfs_stripe'] = tokens[1] + elif (tokens[0] == 'panfs_width'): + db_data['panfs_width'] = tokens[1] + elif (tokens[0] == 'panfs_depth'): + db_data['panfs_depth'] = tokens[1] + elif (tokens[0] == 'panfs_visit'): + db_data['panfs_visit'] = tokens[1] + elif (tokens[0] == 'mpihome'): + db_data['mpihome'] = tokens[1] + elif (tokens[0] == 'segment'): + db_data['segment'] = tokens[1] + elif (tokens[0] == 'user'): + db_data['user'] = tokens[1] + elif (tokens[0] == 'system'): + db_data['system'] = tokens[1] + elif (tokens[0] == 'date_ts'): + db_data['date_ts'] = tokens[1] + elif (tokens[0] == 'mpihost'): + db_data['mpihost'] = tokens[1] + elif (tokens[0] == 'os_version'): + db_data['os_version'] = tokens[1] + elif (tokens[0] == 'yyyymmdd'): + db_data['yyyymmdd'] = tokens[1] + elif (tokens[0] == 'jobid'): + db_data['jobid'] = tokens[1] + elif (tokens[0] == 'mpi_version'): + db_data['mpi_version'] = tokens[1] + elif (tokens[0] == 'host_list'): + db_data['host_list'] = tokens[1] + elif (tokens[0] == 'procs_per_node'): + db_data['procs_per_node'] = tokens[1] + elif (tokens[0] == 'panfs_threads'): + db_data['panfs_threads'] = tokens[1] + elif (tokens[0] == 'panfs'): + db_data['panfs'] = tokens[1] + for i in range(len(tokens)-2): + db_data['panfs'] += " " + tokens[i+2] + + ###### get fs stats ###### + ### NOTE: this info could obtained by parsing output from mdtest + ### but it's both easier and more accurate to do it here + stats = os.statvfs(db_data['working_directory']) + + ### data blocks + total_fs_size = stats.f_blocks * stats.f_bsize + free_fs_size = stats.f_bfree * stats.f_bsize + used_fs_pct = (1 - (float(free_fs_size)/float(total_fs_size))) * 100 + db_data['fs_size'] = total_fs_size + db_data['fs_used_pct'] = used_fs_pct + + ### inodes + total_inodes = stats.f_files + free_inodes = stats.f_ffree + used_inodes_pct = (1 - (float(free_inodes)/float(total_inodes))) * 100 + db_data['inodes_size'] = total_inodes + db_data['inodes_used_pct'] = used_inodes_pct + + ###### parse output from mdtest and put in db_data dictionary ###### + lines = mdtest_output.splitlines() + for line in lines: + if (line.startswith('mdtest')): + line_toks = line.split(' ') + db_data['mdtest_version'] = line_toks[0] + first = True + for l in line_toks: + if (l.isdigit() and first): + db_data['num_tasks'] = l + first = False + elif (l.isdigit()): + db_data['num_nodes'] = l + elif (line.startswith('Path:')): + line_toks = line.split(':') + db_data['path'] = line_toks[1].strip() + elif (line.startswith('random')): + line_toks = line.split(':') + db_data['random_stat'] = line_toks[1].strip() + elif (line.startswith('tree creation rate')): + line_toks = line.split(':') + db_data['tree_create'] = line_toks[1].strip() + elif (line.startswith(" Directory creation:")): + line_toks = line.split() + length = len(line_toks) + for i in range(length): + if (i==(length-4)): + db_data['dir_create_max'] = line_toks[i] + elif (i==(length-3)): + db_data['dir_create_min'] = line_toks[i] + elif (i==(length-2)): + db_data['dir_create_mean'] = line_toks[i] + elif (i==(length-1)): + db_data['dir_create_stddev'] = line_toks[i] + elif (line.startswith(" Directory stat")): + line_toks = line.split() + length = len(line_toks) + for i in range(length): + if (i==(length-4)): + db_data['dir_stat_max'] = line_toks[i] + elif (i==(length-3)): + db_data['dir_stat_min'] = line_toks[i] + elif (i==(length-2)): + db_data['dir_stat_mean'] = line_toks[i] + elif (i==(length-1)): + db_data['dir_stat_stddev'] = line_toks[i] + elif (line.startswith(" Directory removal")): + line_toks = line.split() + length = len(line_toks) + for i in range(length): + if (i==(length-4)): + db_data['dir_remove_max'] = line_toks[i] + elif (i==(length-3)): + db_data['dir_remove_min'] = line_toks[i] + elif (i==(length-2)): + db_data['dir_remove_mean'] = line_toks[i] + elif (i==(length-1)): + db_data['dir_remove_stddev'] = line_toks[i] + elif (line.startswith(" File creation")): + line_toks = line.split() + length = len(line_toks) + for i in range(length): + if (i==(length-4)): + db_data['file_create_max'] = line_toks[i] + elif (i==(length-3)): + db_data['file_create_min'] = line_toks[i] + elif (i==(length-2)): + db_data['file_create_mean'] = line_toks[i] + elif (i==(length-1)): + db_data['file_create_stddev'] = line_toks[i] + elif (line.startswith(" File stat")): + line_toks = line.split() + length = len(line_toks) + for i in range(length): + if (i==(length-4)): + db_data['file_stat_max'] = line_toks[i] + elif (i==(length-3)): + db_data['file_stat_min'] = line_toks[i] + elif (i==(length-2)): + db_data['file_stat_mean'] = line_toks[i] + elif (i==(length-1)): + db_data['file_stat_stddev'] = line_toks[i] + elif (line.startswith(" File removal")): + line_toks = line.split() + length = len(line_toks) + for i in range(length): + if (i==(length-4)): + db_data['file_remove_max'] = line_toks[i] + elif (i==(length-3)): + db_data['file_remove_min'] = line_toks[i] + elif (i==(length-2)): + db_data['file_remove_mean'] = line_toks[i] + elif (i==(length-1)): + db_data['file_remove_stddev'] = line_toks[i] + elif (line.startswith('tree removal rate')): + line_toks = line.split(':') + db_data['tree_remove'] = line_toks[1].strip() + + + + db_insert(db,db_data) + + + + +if __name__ == "__main__": + main() + + diff --git a/microbenchmarks/mdtest/scripts/paramCatch.py b/microbenchmarks/mdtest/scripts/paramCatch.py new file mode 100644 index 00000000..dc146c0c --- /dev/null +++ b/microbenchmarks/mdtest/scripts/paramCatch.py @@ -0,0 +1,46 @@ +import MySQLdb as db,string + +def main(): + + ### change these variables to update the desired field ### + param = "-I" + field = "items_per_dir" + + print "Initiating database connection..." + d = db.connect(host="tangerine.lanl.gov", db="mpi_io_test") + print "Connected to database!" + cursor = d.cursor() + + print "Querying database..." + sql = "SELECT command_line,user,system,date_ts FROM mdtest WHERE !isnull(command_line)" + cursor.execute(sql) + print "Completed SELECT query!" + + data = cursor.fetchone() + + f = open("temp_query","w") + + print "Parsing query results..." + while (data != None): + cmd = data[0] + + if (cmd.find(param) != -1): + list = cmd.split() + index = list.index(param) + sql = "UPDATE mdtest SET "+field+"="+list.pop(index+1) + sql += " WHERE user like '"+data[1]+"'" + sql += " && system like '"+data[2]+"'" + sql += " && date_ts="+str(data[3])+"; " + f.write(sql) + + data = cursor.fetchone() + + d.close() + f.close() + + print "Done parsing query results! Update queries located in sql_query." + + + +if __name__ == "__main__": + main() diff --git a/microbenchmarks/mdtest/scripts/tester.py b/microbenchmarks/mdtest/scripts/tester.py new file mode 100755 index 00000000..fd850d5b --- /dev/null +++ b/microbenchmarks/mdtest/scripts/tester.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python +# +# Tester for mdtest +# +#/*****************************************************************************\ +#* * +#* Copyright (c) 2003, The Regents of the University of California * +#* See the file COPYRIGHT for a complete copyright notice and license. * +#* * +#\*****************************************************************************/ +# +# CVS info: +# $RCSfile: tester.py,v $ +# $Revision: 1.1.2.1 $ +# $Date: 2010/05/11 21:25:16 $ +# $Author: loewe6 $ + +import sys +import os.path +import string +import time + +debug = 0 + +# definitions +RMPOOL = 'systest' +NODES = 1 +TPN = 4 +PROCS = NODES * TPN +EXECUTABLE = '/fs/home/bloewe/benchmarks/mdtest/mdtest' +TEST_DIR_LOC1 = '/panfs/REALM226/home/V1' +TEST_DIR_LOC3 = '/panfs/REALM226/home/V1@/panfs/REALM226/home/V2@/panfs/REALM226/home/V3' +TEST_DIRS = '/panfs/REALM226/home/V1 /panfs/REALM226/home/V2 /panfs/REALM226/home/V3' + +# tests +tests = [ + + # default + "", + + # test directory + "-d " + TEST_DIR_LOC1, + + # number of files per processor + "-d " + TEST_DIR_LOC1 + " -n 3", + + # number of iterations of test + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2", + + # serially create before parallel access + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -c", + + # pre-test delay + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -p 1", + + # verbosity=1 + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -v", + + # verbosity=2 + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -v -v", + + # verbosity=3 + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -V 3", + + # shared file + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -S", + + # read-your-neighbor + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -S -N " + str(TPN), + + # unique subdirectory + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u", + + # time unique subdirectory creation/deletion + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u -t", + + # directories only + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u -t -D", + + # files only + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u -t -F", + + # write 0 bytes + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u -t -F -w 0", + + # write 1 byte + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u -t -F -w 1", + + # write 0 bytes w/fsync + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u -t -F -w 0 -y", + + # write 1 byte w/fsync + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u -t -F -w 1 -y", + + # read-your-neighbor w/unique subdirectory + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -u -t -N " + str(TPN), + + # number of tasks to run + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -N " + str(TPN) + " -f 1 -l " \ + + str(PROCS-1) + " -s " + str(PROCS/3), + + # remove any remaining tests from previous run + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -N " + str(TPN) + " -f 1 -l " \ + + str(PROCS-1) + " -s " + str(PROCS/3) + " -r ", + + # test directories + "-d " + TEST_DIR_LOC3, + + # number of files per processor + "-d " + TEST_DIR_LOC3 + " -n 3", + + # number of iterations of test + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2", + + # pre-test delay + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -p 1", + + # verbosity=1 + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -v", + + # verbosity=2 + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -v -v", + + # verbosity=3 + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -V 3", + + # shared file + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -S", + + # unique subdirectory + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -u", + + # time unique subdirectory creation/deletion + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -u -t", + + # directories only + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -u -t -D", + + # files only + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -u -t -F", + + # write 0 bytes + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -u -t -F -w 0", + + # write 1 byte + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -u -t -F -w 1", + + # write 0 bytes w/fsync + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -u -t -F -w 0 -y", + + # write 1 byte w/fsync + "-d " + TEST_DIR_LOC3 + " -n 3 -i 2 -u -t -F -w 1 -y", + + # number of tasks to run + "-d " + TEST_DIR_LOC1 + " -n 3 -i 2 -f 1 -l " \ + + str(PROCS-1) + " -s " + str(PROCS/3) + +] + + +############################# +# set environment variables # +############################# +def SetEnvironment(rmpool, nodes, procs): + os.environ['MP_RMPOOL'] = str(rmpool) + os.environ['MP_NODES'] = str(nodes) + os.environ['MP_PROCS'] = str(procs) + return + + +################# +# flush to file # +################# +def Flush2File(resultsFile, string): + resultsFile.write(string + '\n') + resultsFile.flush() + + +################### +# run test script # +################### +def RunScript(resultsFile, test): + # -- for poe -- command = "poe " + EXECUTABLE + " " + test + command = "mpiexec -n " + str(PROCS) + " " + EXECUTABLE + " " + test + if debug == 1: + Flush2File(resultsFile, command) + else: + childIn, childOut = os.popen4(command) + childIn.close() + while 1: + line = childOut.readline() + if line == '': break + Flush2File(resultsFile, line[:-1]) + childOut.close() + return + + +######## +# main # +######## +def main(): + resultsFile = open("./results.txt-" + \ + os.popen("date +%m.%d.%y").read()[:-1], "w") + + Flush2File(resultsFile, "Testing mdtest") + + # test -h option on one task + SetEnvironment(RMPOOL, 1, 1) + RunScript(resultsFile, '-h') + + # set environ and run tests + SetEnvironment(RMPOOL, NODES, PROCS) + for i in range(0, len(tests)): + time.sleep(0) # delay any cleanup for previous test + #os.system("rm -rf " + TEST_DIRS) # cleanup TEST_DIRS between tests + RunScript(resultsFile, tests[i]) + + Flush2File(resultsFile, "\nFinished testing mdtest") + resultsFile.close() + +if __name__ == "__main__": + main() + diff --git a/utils/pav_config/tests/stream.yaml b/utils/pav_config/tests/stream.yaml index ef250616..59ddf0e6 100644 --- a/utils/pav_config/tests/stream.yaml +++ b/utils/pav_config/tests/stream.yaml @@ -88,15 +88,10 @@ _base: SOCKETS: 2 4 * ( 45M * 2 ) / 3 ARRAYS / 8 BYTES/ELEMENT = 15.0 Mi elements = 15000000 ***************************************************************************************************** - SAPPHIRE RAPIDS (DDR5): Intel(R) Xeon(R) Platinum 8480+ - CACHE: 107.52M + SAPPHIRE RAPIDS: Intel(R) Xeon(R) Platinum 8480+ + CACHE: 105 SOCKETS: 2 - 4 x (107.52M * 2 ) / 3 ARRAYS / 8 BYTES/ELEMENT = 35.84 Mi elements = 35840000 - ***************************************************************************************************** - SAPPHIRE RAPIDS (HBM): ?? - CACHE: 115.2M - SOCKETS: 2 - 4 x (115.2M * 2 ) / 3 ARRAYS / 8 BYTES/ELEMENT = 38.4 Mi elements = 38400000 + 4 x (105M * 2 ) / 3 ARRAYS / 8 BYTES/ELEMENT = 35 Mi elements = 35000000 scheduler: slurm schedule: @@ -296,7 +291,7 @@ spr_ddr5_xrds: "{{sys_name}}": [ darwin ] variables: arch: "spr" - stream_array_size: 35840000 + stream_array_size: 35e6 target: "xrds-stream.exe" omp_num_threads: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] omp_places: [cores, sockets] @@ -325,7 +320,7 @@ spr_hbm_xrds: "{{sys_name}}": [ darwin ] variables: arch: "spr" - stream_array_size: 38400000 + stream_array_size: 35e6 target: "xrds-stream.exe" omp_num_threads: [1, 2, 4, 8, 16, 32, 64, 128, 256, 512] omp_places: [cores, sockets] @@ -348,3 +343,55 @@ spr_hbm_xrds: preamble: - 'module load {{compilers.name}}/{{compilers.version}}' - 'module load {{mpis.modulefile}}' + +cts1_ats5: + inherits_from: cts1_xrds + subtitle: '{{compilers.name}}-{{compilers.version}}_{{tpn}}_{{mpis.name}}-{{mpis.version}}' + + permute_on: + - compilers + - mpis + - tpn + + variables: + numnodes: '1' + tpn: [1, 2, 4, 8, 16, 32, 36] + omp_num_threads: '1' + + run: + env: + GOMP_CPU_AFFINITY: '' + +xrds_ats5: + inherits_from: cts1_ats5 + + only_if: + "{{sys_name}}": ['crossroads', 'rocinante'] + + variables: + tpn: [8, 32, 56, 88, 112] + arch: "spr" + stream_array_size: 35e6 + omp_places: [cores, sockets] + omp_proc_bind: [true] + + schedule: + partition: 'hbm' + + build: + preamble: + #- 'module load friendly-testing' #'module rm craype-hugepages2M' + - 'module swap PrgEnv-${PE_ENV,,} PrgEnv-{{compilers.pe_env}}' + - 'module load {{compilers.name}}/{{compilers.version}}' + - 'module load {{mpis.name}}/{{mpis.version}}' + + run: + + preamble: + #- 'module load friendly-testing' #'module rm craype-hugepages2M' + - 'module swap PrgEnv-${PE_ENV,,} PrgEnv-{{compilers.pe_env}}' + - 'module load {{compilers.name}}/{{compilers.version}}' + - 'module load {{mpis.name}}/{{mpis.version}}' + + env: + GOMP_CPU_AFFINITY: '' \ No newline at end of file