Lay out files using fio in the benchmark. (#1108)

## Description of change This makes the benchmarks more self-contained, creating the state that they need to run rather than relying on pre-created state (potentially with different/unknown mount options and/or object properties). It does change behaviour a little: previously the multi-thread tests would use the same object whereas now each thread uses its own object -- arguably this is a more useful test but results in higher S3 usage. Also note that the cache benchmark is unchanged in this commit, because it makes assumptions about the filename used by the tests. Removing the assumption that each test will operate on a single file prepares us for future mixed read/write tests, and allows different fio jobs to be run in parallel safely. ## Does this change impact existing behavior? Yes, see above: previously the multi-thread tests would use the same object whereas now each thread uses its own object -- arguably this is a more useful test but results in higher S3 usage. ## Does this change need a changelog entry in any of the crates? No. --- By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license and I agree to the terms of the [Developer Certificate of Origin (DCO)](https://developercertificate.org/). Signed-off-by: Andrew Peace <[email protected]> Signed-off-by: Andy Peace <[email protected]>
awslabs · Nov 13, 2024 · 1c6f819 · 1c6f819 · github-actions · Nov 13, 2024
1 parent f8ca2ba
commit 1c6f819
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 43 deletions.
diff --git a/doc/BENCHMARKING.md b/doc/BENCHMARKING.md
@@ -42,6 +42,7 @@ You can use the following steps.
         export S3_BUCKET_NAME=bucket_name
         export S3_BUCKET_TEST_PREFIX=prefix_path/
         export S3_ENDPOINT_URL=endpoint_url # optional
+        # these filenames only needed by cache benchmark
         export S3_BUCKET_BENCH_FILE=bench_file_name
         export S3_BUCKET_SMALL_BENCH_FILE=small_bench_file_name
         # to filter by job name; e.g. to only run small jobs

diff --git a/mountpoint-s3/scripts/fio/read_latency/ttfb.fio b/mountpoint-s3/scripts/fio/read_latency/ttfb.fio
@@ -1,9 +1,11 @@
 [global]
 name=fs_bench
-bs=1B
+# large write block size not used as part of benchmark, but speeds up file layout
+bs=1B,1M
 
 [time_to_first_byte_read]
-size=1B
+size=1G
+io_limit=1B
 rw=read
 ioengine=sync
 fallocate=none

diff --git a/mountpoint-s3/scripts/fs_bench.sh b/mountpoint-s3/scripts/fs_bench.sh
@@ -16,16 +16,6 @@ if [[ -z "${S3_BUCKET_TEST_PREFIX}" ]]; then
   exit 1
 fi
 
-if [[ -z "${S3_BUCKET_BENCH_FILE}" ]]; then
-  echo "Set S3_BUCKET_BENCH_FILE to run this benchmark"
-  exit 1
-fi
-
-if [[ -z "${S3_BUCKET_SMALL_BENCH_FILE}" ]]; then
-  echo "Set S3_BUCKET_SMALL_BENCH_FILE to run this benchmark"
-  exit 1
-fi
-
 if [[ -n "${S3_JOB_NAME_FILTER}" ]]; then
   echo "Will only run fio jobs which match $S3_JOB_NAME_FILTER"
 fi
@@ -54,9 +44,8 @@ mkdir -p ${results_dir}
 
 run_fio_job() {
   job_file=$1
-  bench_file=$2
-  mount_dir=$3
-  log_dir=$4
+  mount_dir=$2
+  log_dir=$3
 
   job_name=$(basename "${job_file}")
   job_name="${job_name%.*}"
@@ -73,7 +62,6 @@ run_fio_job() {
       --output=${results_dir}/${job_name}_iter${i}.json \
       --output-format=json \
       --directory=${mount_dir} \
-      --filename=${bench_file} \
       --eta=never \
       ${job_file}
     job_status=$?
@@ -117,12 +105,6 @@ run_benchmarks() {
   category=$1
   jobs_dir=mountpoint-s3/scripts/fio/$category
 
-  if [ $category == "read" ]; then
-    part_size="--part-size=16777216"
-  else
-    part_size=""
-  fi
-
   for job_file in "${jobs_dir}"/*.fio; do
 
     if ! should_run_job "${job_file}"; then
@@ -151,15 +133,21 @@ run_benchmarks() {
     rm -rf ${log_dir}
     mkdir -p ${log_dir}
 
-    # mount file system
+    # Mount file system first with a large part size if needed
+    if [[ $job_file != *small.fio ]]; then
+      part_size_option="--part-size=16777216"
+    else
+      unset part_size_option
+    fi
     set +e
     cargo run --quiet --release -- \
       ${S3_BUCKET_NAME} ${mount_dir} \
       --allow-delete \
+      --allow-overwrite \
       --log-directory=${log_dir} \
       --prefix=${S3_BUCKET_TEST_PREFIX} \
-      $part_size \
       --log-metrics \
+      $part_size_option \
       ${optional_args}
     mount_status=$?
     set -e
@@ -168,19 +156,17 @@ run_benchmarks() {
       exit 1
     fi
 
-    # set bench file
-    if [[ $category == "write" ]]; then
-      bench_file=${job_name}_${RANDOM}.dat
-    else
-      bench_file=${S3_BUCKET_BENCH_FILE}
-      # run against small file if the job file ends with small.fio
-      if [[ $job_file == *small.fio ]]; then
-        bench_file=${S3_BUCKET_SMALL_BENCH_FILE}
-      fi
-    fi
+    # Lay out files for the test:
+    echo >&2 Laying out files for $job_file
+    fio --thread \
+      --directory=${mount_dir} \
+      --create_only=1 \
+      --eta=never \
+      ${job_file}
 
     # run the benchmark
-    run_fio_job $job_file $bench_file $mount_dir $log_dir
+    echo >&2 Running $job_file
+    run_fio_job $job_file $mount_dir $log_dir
 
     # collect resource utilization metrics (peak memory usage)
     cargo run --bin mount-s3-log-analyzer ${log_dir} ${results_dir}/${job_name}_peak_mem.json ${job_name}

diff --git a/mountpoint-s3/scripts/fs_cache_bench.sh b/mountpoint-s3/scripts/fs_cache_bench.sh
@@ -169,6 +169,7 @@ cache_benchmark () {
     cargo run --quiet --release -- \
       ${S3_BUCKET_NAME} ${mount_dir} \
       --allow-delete \
+      --allow-overwrite \
       --cache=${cache_dir} \
       --log-directory=${log_dir} \
       --prefix=${S3_BUCKET_TEST_PREFIX} \

diff --git a/mountpoint-s3/scripts/fs_latency_bench.sh b/mountpoint-s3/scripts/fs_latency_bench.sh
@@ -142,6 +142,7 @@ for job_file in "${jobs_dir}"/*.fio; do
   # mount file system
   cargo run --release ${S3_BUCKET_NAME} ${mount_dir} \
     --allow-delete \
+    --allow-overwrite \
     --log-directory=$log_dir \
     --prefix=${S3_BUCKET_TEST_PREFIX} \
     --log-metrics \
@@ -152,19 +153,20 @@ for job_file in "${jobs_dir}"/*.fio; do
     exit 1
   fi
 
-  # set bench file
-  bench_file=${S3_BUCKET_BENCH_FILE}
-  # run against small file if the job file ends with small.fio
-  if [[ $job_file == *small.fio ]]; then
-    bench_file=${S3_BUCKET_SMALL_BENCH_FILE}
-  fi
+  # Lay out files for the test:
+  echo >&2 Laying out files for $job_file
+  fio --thread \
+    --directory=${mount_dir} \
+    --create_only=1 \
+    --eta=never \
+    ${job_file}
 
-  # time to first byte should not be longer than 5 minutes
+  # run the benchmark
+  echo >&2 Running $job_file
   timeout 300s fio --thread \
     --output=${results_dir}/${job_name}.json \
     --output-format=json \
     --directory=${mount_dir} \
-    --filename=${bench_file} \
     ${job_file}
   job_status=$?
   if [ $job_status -ne 0 ]; then
Benchmark suite	Current: `1c6f819`	Previous: `f8ca2ba`	Ratio
`random_read_four_threads_direct_io`	`4.13369140625` MiB/s	`21.93291015625` MiB/s	`5.31`
`random_read_four_threads`	`4.064453125` MiB/s	`17.75322265625` MiB/s	`4.37`
`random_read_direct_io`	`0.95556640625` MiB/s	`3.196875` MiB/s	`3.35`
`random_read`	`1.139453125` MiB/s	`3.40947265625` MiB/s	`2.99`