From a457e0b2585d3e7b478460ced31f1798f8c34a65 Mon Sep 17 00:00:00 2001 From: Mike Bland Date: Sun, 3 Sep 2017 09:45:06 -0400 Subject: [PATCH] lib/path: Add path processing utilities Part of #184. Contains `@go.canonicalize_path` and `@go.realpath` functions not from the original `scripts/lib/walk` in mbland/dev-setup. Updates `walk_files` to `@go.walk_file_system` with a breadth-first search option. --- lib/path | 210 +++++++++++++++++++++++ tests/path-module/canonicalize-path.bats | 85 +++++++++ tests/path-module/realpath.bats | 123 +++++++++++++ tests/path-module/walk-file-system.bats | 126 ++++++++++++++ tests/path-module/walk-path-forward.bats | 79 +++++++++ 5 files changed, 623 insertions(+) create mode 100644 lib/path create mode 100644 tests/path-module/canonicalize-path.bats create mode 100644 tests/path-module/realpath.bats create mode 100644 tests/path-module/walk-file-system.bats create mode 100644 tests/path-module/walk-path-forward.bats diff --git a/lib/path b/lib/path new file mode 100644 index 0000000..39e6cad --- /dev/null +++ b/lib/path @@ -0,0 +1,210 @@ +#! /usr/bin/env bash +# +# File system path processing and traversal +# +# Exports: +# @go.canonicalize_path +# Removes all extra slashes from a path and resolves all relative components +# +# @go.walk_file_system +# Performs an operation on file system objects and recurses into directories +# +# @go.walk_path_forward +# Processes a file path from the first component to the last +# +# @go.realpath +# Translates a path into its canonical absolute path + +. "$_GO_USE_MODULES" 'validation' + +# Removes all extra slashes from a path and resolves all relative components +# +# This will reduce any consecutive string of slashes to a single slash, and trim +# a trailing slash if one remains. +# +# Arguments: +# result_var_name: Name of the variable into which the result will be stored +# path: Path to canonicalize +@go.canonicalize_path() { + @go.validate_identifier_or_die 'Result variable name' "$1" + printf -v "$1" '%s' "$2/" + + while [[ "${!1}" =~ //+ ]]; do + printf -v "$1" '%s' "${!1/"${BASH_REMATCH[0]}"//}" + done + + while [[ "${!1}" =~ /\./ ]]; do + printf -v "$1" '%s' "${!1/"${BASH_REMATCH[0]}"//}" + done + + while [[ "${!1}" =~ [^/]+/\.\./ ]]; do + if [[ "${BASH_REMATCH[0]}" != '../../' ]]; then + printf -v "$1" '%s' "${!1/"${BASH_REMATCH[0]}"/}" + elif [[ "${!1}" =~ ^/[./]+ ]]; then + printf -v "$1" '%s' "${!1/"${BASH_REMATCH[0]}"//}" + else + break + fi + done + + if [[ "${!1}" =~ ^/[./]+ ]]; then + printf -v "$1" '%s' "${!1/"${BASH_REMATCH[0]}"//}" + fi + + if [[ "${!1}" != '/' ]]; then + printf -v "$1" '%s' "${!1%/}" + fi +} + +# Performs an operation on file system objects and recurses into directories +# +# Each call to `operation` receives a path to an existing file system object. +# Performs a depth-first search algorithm by default, recursing into each +# directory as it is encountered. The `--bfs` option changes this behavior to +# recurse into directories as the last step of the algorithm. +# +# The algorithm terminates when it finishes visiting all the file system objects +# reachable from the path arguments or when `operation` returns a nonzero value. +# +# Options: +# --bfs: Breadth-first search; recurse into directories at end of algorithm +# +# Arguments: +# operation: Name of the function taking a file system path as an argument +# ...: Paths from which to begin walking the file system +# +# Returns: +# Zero if `operation` always returned zero and the algorithm finished +# Nonzero if the algorithm was terminated by a nonzero return from `operation` +@go.walk_file_system() { + local operation + local current + local do_bfs + local bfs_queue=() + + if [[ "$1" == '--bfs' ]]; then + do_bfs='true' + shift + fi + operation="$1" + + for current in "${@:2}"; do + if [[ -e "$current" ]] && ! "$operation" "$current"; then + return 1 + elif [[ -d "$current" ]]; then + if [[ -n "$do_bfs" ]]; then + bfs_queue+=("$current") + elif ! @go.walk_file_system "$operation" "$current"/*; then + return 1 + fi + fi + done + + for current in "${bfs_queue[@]}"; do + if ! @go.walk_file_system '--bfs' "$operation" "$current"/*; then + return 1 + fi + done +} + +# Processes a file path from the first component to the last +# +# The first call to `operation` receives the first component of the path as its +# argument. Each successive call to `operation` receives the previous path plus +# its child component. +# +# The algorithm terminates when the entire path has been processed or when +# `operation` returns a nonzero value. +# +# Arguments: +# operation: Name of the function taking a file system path as an argument +# path: Path to walk forward from first parent to last child +# +# Returns: +# Zero if `operation` always returned zero and the algorithm finished +# Nonzero if the algorithm was terminated by a nonzero return from `operation` +@go.walk_path_forward() { + local operation="$1" + local oldIFS="$IFS" + local IFS='/' + local components=($2) + local component + local current_path + + IFS="$oldIFS" + + for component in "${components[@]}"; do + current_path+="${component:-/}" + if ! "$operation" "$current_path"; then + return 1 + elif [[ -n "$component" ]]; then + current_path+='/' + fi + done +} + +# Translates a path into its canonical absolute path +# +# This will eliminate extra slashes and symbolic links. It will resolve relative +# path components (i.e. '.' and '..') up to the point at which the path actually +# exists in the file system. +# +# Arguments: +# result_var_name: Name of the variable to which the real path will be stored +# path: Path to resolve to a real path +@go.realpath() { + @go.validate_identifier_or_die 'Result variable name' "$1" + local __grp_path="$2" + local __grpi_real_prefix + local __grpi_real_dir + local __grp_orig_pwd="$PWD" + + cd -P "$PWD" >/dev/null + printf -v '__grpi_real_dir' '%s' "$PWD" + + while :; do + @go.canonicalize_path '__grp_path' "$__grp_path" + @go.walk_path_forward _@go.realpath_impl "$__grp_path" || : + + if [[ -z "$__grpi_real_prefix" ]]; then + __grp_path="/$__grp_path" + fi + printf -v "$1" '%s' "${__grpi_real_dir}${__grp_path#$__grpi_real_prefix}" + + if [[ ! -L "${!1}" ]]; then + break + fi + + # Use `ls` since it exists everywhere, `readlink` and `realpath` don't, and + # there's no way to get the target of a file symlink directly from Bash. + __grp_path="$(ls -l "${!1}")" + __grp_path="${__grp_path#* -> }" + if [[ "${__grp_path:0:1}" != '/' ]]; then + cd "$__grpi_real_dir" >/dev/null + fi + done + cd "$__grp_orig_pwd" >/dev/null +} + +# -------------------------------- +# IMPLEMENTATION - HERE BE DRAGONS +# +# None of the functions below this line are part of the public interface. +# -------------------------------- + +# Implementation helper for @go.realpath +# +# Globals: +# __grpi_real_prefix: Assigned last path value that exists in the file system +# __grpi_real_dir: Assigned absolute path of __grpi_real_prefix +# +# Arguments: +# path: Current path passed in by @go.walk_path_forward +_@go.realpath_impl() { + if ! cd -P "$1" >/dev/null 2>&1; then + return 1 + fi + printf -v '__grpi_real_prefix' '%s' "$1" + printf -v '__grpi_real_dir' '%s' "$PWD" + cd - >/dev/null 2>&1 +} diff --git a/tests/path-module/canonicalize-path.bats b/tests/path-module/canonicalize-path.bats new file mode 100644 index 0000000..2654509 --- /dev/null +++ b/tests/path-module/canonicalize-path.bats @@ -0,0 +1,85 @@ +#! /usr/bin/env bats + +load ../environment +load "$_GO_CORE_DIR/lib/path" + +setup() { + test_filter +} + +teardown() { + @go.remove_test_go_rootdir +} + +run_canonicalize_path() { + @go.create_test_go_script \ + '. "$_GO_USE_MODULES" "path"' \ + '@go.canonicalize_path "result" "$1"' \ + 'printf "%s\n" "$result"' + run "$TEST_GO_SCRIPT" "$1" +} + +@test "$SUITE: leaves a path unchanged" { + run_canonicalize_path '/foo/bar/baz' + assert_success '/foo/bar/baz' +} + +@test "$SUITE: leaves root path unchanged" { + run_canonicalize_path '/' + assert_success '/' +} + +@test "$SUITE: leaves relative current dir path unchanged" { + run_canonicalize_path '.' + assert_success '.' +} + +@test "$SUITE: leaves relative parent dir path unchanged" { + run_canonicalize_path '..' + assert_success '..' +} + +@test "$SUITE: removes extra root slashes, parents" { + run_canonicalize_path '//..///..////../////' + assert_success '/' +} + +@test "$SUITE: removes all extra slashes" { + run_canonicalize_path '//foo///bar////baz/////' + assert_success '/foo/bar/baz' +} + +@test "$SUITE: resolves a relative parent" { + run_canonicalize_path 'foo/bar/../baz' + assert_success 'foo/baz' +} + +@test "$SUITE: resolves multiple relative parents" { + run_canonicalize_path 'foo/bar/../../baz/quux/..' + assert_success 'baz' +} + +@test "$SUITE: resolves relative parents beyond beginning" { + run_canonicalize_path 'foo/bar/../../baz/quux/../../../..' + assert_success '../..' +} + +@test "$SUITE: resolves relative parents beyond the root" { + run_canonicalize_path '/foo/bar/../../../baz/../../quux/xyzzy/../../../plugh' + assert_success '/plugh' +} + +@test "$SUITE: resolves a relative self" { + run_canonicalize_path 'foo/bar/./baz' + assert_success 'foo/bar/baz' +} + +@test "$SUITE: resolves multiple relative selves" { + run_canonicalize_path 'foo/./bar/././baz/./././' + assert_success 'foo/bar/baz' +} + +@test "$SUITE: resolves multiple relative parents, selves" { + run_canonicalize_path 'foo/./bar/./.././.././baz/./quux/..' + assert_success 'baz' +} diff --git a/tests/path-module/realpath.bats b/tests/path-module/realpath.bats new file mode 100644 index 0000000..814942c --- /dev/null +++ b/tests/path-module/realpath.bats @@ -0,0 +1,123 @@ +#! /usr/bin/env bats + +load ../environment +load "$_GO_CORE_DIR/lib/path" + +setup() { + test_filter +} + +teardown() { + @go.remove_test_go_rootdir +} + +run_realpath() { + @go.create_test_go_script \ + '. "$_GO_USE_MODULES" "path"' \ + '@go.realpath "result" "$1"' \ + "if [[ \"\$PWD\" != '$TEST_GO_ROOTDIR' ]]; then" \ + " printf \"EXPECTED PWD: %s\n\" '$TEST_GO_ROOTDIR' >&2" \ + ' printf "ACTUAL PWD: %s\n" "$PWD" >&2' \ + ' exit 1' \ + 'fi' \ + 'printf "%s\n" "$result"' + + cd -P "$TEST_GO_ROOTDIR" >/dev/null + export REAL_TEST_GO_ROOTDIR="$PWD" + cd - >/dev/null + + run "$TEST_GO_SCRIPT" "$1" +} + +@test "$SUITE: resolves current directory to PWD" { + run_realpath '.' + assert_success "$REAL_TEST_GO_ROOTDIR" +} + +@test "$SUITE: resolves '..' to parent of PWD" { + run_realpath '..' + assert_success "${REAL_TEST_GO_ROOTDIR%/*}" +} + +@test "$SUITE: leaves root directory unchanged" { + run_realpath '/' + assert_success '/' +} + +@test "$SUITE: resolves relative parents of root to root" { + run_realpath '//..///..////../////' + assert_success '/' +} + +@test "$SUITE: leaves nonexistent absolute directory unchanged" { + run_realpath '/foo/bar' + assert_success '/foo/bar' +} + +@test "$SUITE: resolves nonexistent directory to child of PWD" { + run_realpath 'foo/bar' + assert_success "$REAL_TEST_GO_ROOTDIR/foo/bar" +} + +@test "$SUITE: resolves nonexistent ./ directory to child of PWD" { + run_realpath './foo/bar' + assert_success "$REAL_TEST_GO_ROOTDIR/foo/bar" +} + +@test "$SUITE: resolves '..' in a nonexistent directory" { + run_realpath 'foo/bar/../baz' + assert_success "$REAL_TEST_GO_ROOTDIR/foo/baz" +} + +@test "$SUITE: resolves nonexistent absolute directory to itself" { + run_realpath '/foo/bar' + assert_success '/foo/bar' +} + +@test "$SUITE: resolves directory symlinks" { + skip_if_system_missing 'ln' + if [[ "$OSTYPE" == 'msys' ]]; then + skip "ln doesn't work like it normally does on MSYS2" + fi + + local dir_from_orig_path + local dir_from_symlink_path + + mkdir -p "$TEST_GO_ROOTDIR/foo" + ln -s "$TEST_GO_ROOTDIR/foo" "$TEST_GO_ROOTDIR/bar" + + run_realpath "$TEST_GO_ROOTDIR/foo" + assert_success + dir_from_orig_path="$output" + + run_realpath "$TEST_GO_ROOTDIR/bar" + assert_success + dir_from_symlink_path="$output" + assert_equal "$dir_from_orig_path" "$dir_from_symlink_path" +} + +@test "$SUITE: resolves file symlinks" { + skip_if_system_missing 'ln' + if [[ "$OSTYPE" == 'msys' ]]; then + skip "ln doesn't work like it normally does on MSYS2" + fi + + local file_from_orig_path + local file_from_symlink_path + + mkdir -p "$TEST_GO_ROOTDIR" + printf 'foo\n' >"$TEST_GO_ROOTDIR/foo" + ln -s "$TEST_GO_ROOTDIR/foo" "$TEST_GO_ROOTDIR/bar" + ln -s 'bar' "$TEST_GO_ROOTDIR/baz" + ln -s "$TEST_GO_ROOTDIR/baz" "$TEST_GO_ROOTDIR/quux" + ln -s 'quux' "$TEST_GO_ROOTDIR/xyzzy" + + run_realpath "$TEST_GO_ROOTDIR/foo" + assert_success + file_from_orig_path="$output" + + run_realpath "$TEST_GO_ROOTDIR/bar" + assert_success + file_from_symlink_path="$output" + assert_equal "$file_from_orig_path" "$file_from_symlink_path" +} diff --git a/tests/path-module/walk-file-system.bats b/tests/path-module/walk-file-system.bats new file mode 100644 index 0000000..5e506bd --- /dev/null +++ b/tests/path-module/walk-file-system.bats @@ -0,0 +1,126 @@ +#! /usr/bin/env bats + +load ../environment +load "$_GO_CORE_DIR/lib/path" + +WALK_TEST_ROOT="$TEST_GO_ROOTDIR/walk-files-test" + +setup() { + test_filter +} + +teardown() { + @go.remove_test_go_rootdir +} + +run_walk_file_system() { + @go.create_test_go_script \ + '. "$_GO_USE_MODULES" "path"' \ + 'declare WALKED' \ + 'WALKED=()' \ + 'walk_callback() {' \ + ' WALKED+=("$1")' \ + ' if [[ "$1" == "$STOP_PATH" ]]; then' \ + ' return 1' \ + ' fi' \ + '}' \ + 'if [[ "$1" == "--bfs" ]]; then' \ + ' @go.walk_file_system --bfs walk_callback "${@:2}"' \ + 'else' \ + ' @go.walk_file_system walk_callback "$@"' \ + 'fi' \ + 'RESULT="$?"' \ + 'printf "%s\n" "${WALKED[@]#$WALK_TEST_ROOT/}"' \ + 'exit "$RESULT"' + WALK_TEST_ROOT="$WALK_TEST_ROOT" run "$TEST_GO_SCRIPT" "$@" +} + +create_walk_test_files() { + set "$DISABLE_BATS_SHELL_OPTIONS" + __create_walk_test_files "$@" + restore_bats_shell_options +} + +__create_walk_test_files() { + local files=("${@/#/$WALK_TEST_ROOT/}") + local dirs=("${files[@]%/*}") + local current + + create_bats_test_dirs "${dirs[@]#$TEST_GO_ROOTDIR/}" + + for current in "${files[@]}"; do + printf '%s\n' "${current##*/}" >"$current" + done +} + +@test "$SUITE: empty args" { + run_walk_file_system + assert_success '' +} + +@test "$SUITE: empty path" { + run_walk_file_system '' + assert_success '' +} + +@test "$SUITE: nonexistent file paths" { + run_walk_file_system 'foo' 'bar' 'baz' + assert_success '' +} + +@test "$SUITE: walk from root" { + create_walk_test_files 'foo/bar' 'baz' 'quux/xyzzy' 'quux/plugh' + run_walk_file_system "$WALK_TEST_ROOT" + assert_success \ + "$WALK_TEST_ROOT" \ + 'baz' \ + 'foo' \ + 'foo/bar' \ + 'quux' \ + 'quux/plugh' \ + 'quux/xyzzy' +} + +@test "$SUITE: walk from root breadth-first" { + create_walk_test_files 'foo/bar' 'baz' 'quux/xyzzy' 'quux/plugh' + run_walk_file_system --bfs "$WALK_TEST_ROOT" + assert_success \ + "$WALK_TEST_ROOT" \ + 'baz' \ + 'foo' \ + 'quux' \ + 'foo/bar' \ + 'quux/plugh' \ + 'quux/xyzzy' +} + +@test "$SUITE: walk specific dirs and files" { + create_walk_test_files 'foo/bar' 'baz' 'quux/xyzzy' 'quux/plugh' + run_walk_file_system "$WALK_TEST_ROOT/foo" "$WALK_TEST_ROOT/quux/plugh" + assert_success \ + 'foo' \ + 'foo/bar' \ + 'quux/plugh' +} + +@test "$SUITE: terminating depth-first search returns nonzero" { + create_walk_test_files 'foo/bar' 'baz' 'quux/xyzzy' 'quux/plugh' + STOP_PATH="$WALK_TEST_ROOT/foo/bar" run_walk_file_system "$WALK_TEST_ROOT" + assert_failure \ + "$WALK_TEST_ROOT" \ + 'baz' \ + 'foo' \ + 'foo/bar' +} + +@test "$SUITE: terminating breadth-first search returns nonzero" { + create_walk_test_files 'foo/bar' 'baz' 'quux/xyzzy' 'quux/plugh' + STOP_PATH="$WALK_TEST_ROOT/foo/bar" \ + run_walk_file_system --bfs "$WALK_TEST_ROOT" + assert_failure \ + "$WALK_TEST_ROOT" \ + 'baz' \ + 'foo' \ + 'quux' \ + 'foo/bar' +} diff --git a/tests/path-module/walk-path-forward.bats b/tests/path-module/walk-path-forward.bats new file mode 100644 index 0000000..05e4db7 --- /dev/null +++ b/tests/path-module/walk-path-forward.bats @@ -0,0 +1,79 @@ +#! /usr/bin/env bats + +load ../environment +load "$_GO_CORE_DIR/lib/path" + +setup() { + test_filter +} + +teardown() { + @go.remove_test_go_rootdir +} + +run_walk_path_forward() { + @go.create_test_go_script \ + '. "$_GO_USE_MODULES" "path"' \ + 'declare WALKED' \ + 'WALKED=()' \ + 'walk_callback() {' \ + ' WALKED+=("$1")' \ + ' if [[ "$1" == "$STOP_PATH" ]]; then' \ + ' return 1' \ + ' fi' \ + '}' \ + '@go.walk_path_forward walk_callback "$1"' \ + 'RESULT="$?"' \ + 'printf "%s\n" "${WALKED[@]}"' \ + 'exit "$RESULT"' + run "$TEST_GO_SCRIPT" "$1" +} + +@test "$SUITE: empty path" { + run_walk_path_forward + assert_success '' +} + +@test "$SUITE: root path" { + run_walk_path_forward '/' + assert_success '/' +} + +@test "$SUITE: absolute path" { + run_walk_path_forward '/foo/bar/baz' + assert_success \ + '/' \ + '/foo' \ + '/foo/bar' \ + '/foo/bar/baz' +} + +@test "$SUITE: relative path" { + run_walk_path_forward 'foo/bar/baz' + assert_success \ + 'foo' \ + 'foo/bar' \ + 'foo/bar/baz' +} + +@test "$SUITE: stop walking and return nonzero when operation returns nonzero" { + STOP_PATH='foo/bar' run_walk_path_forward 'foo/bar/baz' + assert_failure \ + 'foo' \ + 'foo/bar' +} + +@test "$SUITE: path with adjacent slashes" { + run_walk_path_forward '/foo//bar///baz////' + assert_success \ + '/' \ + '/foo' \ + '/foo//' \ + '/foo//bar' \ + '/foo//bar//' \ + '/foo//bar///' \ + '/foo//bar///baz' \ + '/foo//bar///baz//' \ + '/foo//bar///baz///' \ + '/foo//bar///baz////' +}