From ba00ab7bb1258660454cc383976c6aecbf472085 Mon Sep 17 00:00:00 2001 From: gareth Date: Fri, 27 Aug 2021 22:26:08 +1000 Subject: [PATCH 01/51] change name to match discussion --- src/CMakeLists.txt | 1 + src/Makefile.manual | 5 +- src/stdlib_selection.fypp | 249 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 253 insertions(+), 2 deletions(-) create mode 100644 src/stdlib_selection.fypp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b56d75b0e..9121948e4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,6 +11,7 @@ set(fppFiles stdlib_linalg_diag.fypp stdlib_linalg_outer_product.fypp stdlib_optval.fypp + stdlib_selection.fypp stdlib_sorting.fypp stdlib_sorting_ord_sort.fypp stdlib_sorting_sort.fypp diff --git a/src/Makefile.manual b/src/Makefile.manual index 7576cf3d8..e44fa75a3 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -12,6 +12,7 @@ SRCFYPP = \ stdlib_quadrature.fypp \ stdlib_quadrature_trapz.fypp \ stdlib_quadrature_simps.fypp \ + stdlib_selection.fypp \ stdlib_sorting.fypp \ stdlib_sorting_ord_sort.fypp \ stdlib_sorting_sort.fypp \ @@ -27,8 +28,8 @@ SRCFYPP = \ stdlib_stats_moment_scalar.fypp \ stdlib_stats_var.fypp \ stdlib_math.fypp \ - stdlib_math_linspace.fypp \ - stdlib_math_logspace.fypp \ + stdlib_math_linspace.fypp \ + stdlib_math_logspace.fypp \ stdlib_stats_distribution_PRNG.fypp \ stdlib_string_type.fypp \ stdlib_string_type_constructor.fypp \ diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp new file mode 100644 index 000000000..74443620a --- /dev/null +++ b/src/stdlib_selection.fypp @@ -0,0 +1,249 @@ +#:include "common.fypp" +! Specify kinds/types for the input array in quick_select and arg_quick_select +#:set ARRAY_KINDS_TYPES = INT_KINDS_TYPES + REAL_KINDS_TYPES +! The index arrays are of all INT_KINDS_TYPES + +module stdlib_quick_select +! +! This code was modified from a matlab implementation of "qselect" by Manolis +! Lourakis, https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect +! Below is the licence of qselect +! +! Copyright (c) 2018, Manolis Lourakis +! All rights reserved. +! +! Redistribution and use in source and binary forms, with or without +! modification, are permitted provided that the following conditions are met: +! +! * Redistributions of source code must retain the above copyright notice, this +! list of conditions and the following disclaimer. +! +! * Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution +! * Neither the name of Foundation for Research and Technology - Hellas nor the +! names of its contributors may be used to endorse or promote products +! derived from this software without specific prior written permission. +! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +! + +use stdlib_kinds, only: int8, int16, int32, int64, sp, dp, qp + +implicit none + +private + +public select, arg_select + +interface select + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("quick_select", 1, arraytype, arraykind, intkind) + module procedure ${name}$ + #:endfor + #:endfor +end interface + +interface arg_select + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("arg_quick_select", 1, arraytype, arraykind, intkind) + module procedure ${name}$ + #:endfor + #:endfor +end interface + +contains + + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("quick_select", 1, arraytype, arraykind, intkind) + subroutine ${name}$(a, k, kth_smallest, left, right) + !! quick_select - select the k-th smallest entry in a(:). + !! + !! Implements Hoares Quickselect algorithm + !! (https://en.wikipedia.org/wiki/Quickselect) + !! with the median-of-3 pivot strategy. + !! Operates in-place, avoiding sorting and recursion. + !! + !! Modified from a translation of "qselect" by Manolis Lourakis + !! https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect + !! + ${arraytype}$, intent(inout) :: a(:) + !! Array in which we seek the kth-smallest entry. + !! On output it will be partially sorted such that + !! `maxval(a(1:(k-1))) <= a(k) <= minval(a((k+1):size(a))).` + ${inttype}$, intent(in) :: k + !! We want the kth smallest entry. E.G. `k=1` leads to + !! `kth_smallest=min(a)`, and `k=size(a)` leads to + !! `kth_smallest=max(a)` + ${arraytype}$, intent(out) :: kth_smallest + !! On output contains the kth-smallest value of `a(:)` + ${inttype}$, intent(in), optional :: left, right + !! If we know that: + !! the kth-smallest entry of `a` is in `a(left:right)` + !! and also that: + !! `maxval(a(1:(left-1))) <= minval(a(left:right))` + !! and: + !! `maxval(a(left:right))) <= minval(a((right+1):size(a)))` + !! then one or both bounds can be specified to narrow the search. + !! The constraints are available if we have previously called the + !! subroutine with different `k` (because of how `a(:)` becomes + !! partially sorted, see documentation for `a(:)`). + + ${inttype}$ :: l, r, s, i, j, k_local + ${arraytype}$ :: pivot + + l = 1_${intkind}$ + if(present(left)) l = left + r = size(a) + if(present(right)) r = right + + if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & + r > size(a)) then + stop "quick_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; + end if + + k_local = k - l + 1 + + do while(.true.) + s = (l+r)/2_${intkind}$ ! Deliberate integer division + if(a(s) < a(r)) call swap(a(s), a(r)) + if(a(s) < a(l)) call swap(a(s), a(l)) + if(a(r) < a(l)) call swap(a(r), a(l)) + pivot = a(r) ! Median + + i = l + do j = l, r-1 + if(a(j) <= pivot) then + call swap(a(i), a(j)) + i = i+1 + end if + end do + call swap(a(r), a(i)) + s = i-l+1 + if(k_local < s) then + r = i-1 + else if(k_local > s) then + l=i+1; k_local=k_local-s; + else + kth_smallest = a(i) + return + end if + end do + + contains + subroutine swap(a, b) + ${arraytype}$, intent(inout) :: a, b + ${arraytype}$ :: tmp + tmp = a; a = b; b = tmp + end subroutine + end subroutine + #:endfor + #:endfor + + + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("arg_quick_select", 1, arraytype, arraykind, intkind) + subroutine ${name}$(a, indx, k, kth_smallest, left, right) + !! arg_quick_select - find the index of the k-th smallest entry in `a(:)` + !! + !! Implements Hoares Quickselect algorithm + !! https://en.wikipedia.org/wiki/Quickselect) + !! with the median-of-3 pivot strategy. + !! Operates in-place, avoiding sorting and recursion. + !! + ${arraytype}$, intent(in) :: a(:) + !! Array in which we seek the kth-smallest entry. + ${inttype}$, intent(inout) :: indx(:) + !! Array of indices into `a(:)`. Must contain each integer + !! from `1:size(a)` exactly once. On output it will be partially + !! "sorted" such that + !! `all( a(indx(1:(k-1)))) <= a(indx(k)) ) .AND. + !! all( a(indx(k)) <= a(indx( (k+1):size(a) )) )`. + ${inttype}$, intent(in) :: k + !! We want index of the kth smallest entry. E.G. `k=1` leads to + !! `a(kth_smallest) = min(a)`, and `k=size(a)` leads to + !! `a(kth_smallest) = max(a)` + ${inttype}$, intent(out) :: kth_smallest + !! On output contains the index with the kth-smallest value of `a(:)` + ${inttype}$, intent(in), optional :: left, right + !! If we know that: + !! the kth-smallest entry of `a` is in `a(indx(left:right))` + !! and also that: + !! `maxval(a(indx(1:(left-1)))) <= minval(a(indx(left:right)))` + !! and: + !! `maxavl(a(indx(left:right))) <= minval(a(indx((right+1):size(a))))` + !! then one or both bounds can be specified to reduce the search + !! time. These constraints are available if we have previously + !! called the subroutine with a different `k` (due to the way that + !! `indx(:)` becomes partially sorted, see documentation for `indx(:)`). + + ${inttype}$ :: l, r, s, i, j, k_local + ${arraytype}$ :: pivot + + l = 1_${intkind}$ + if(present(left)) l = left + r = size(a) + if(present(right)) r = right + + if(size(a) /= size(indx)) then + stop "arg_quick_select must have size(a) == size(indx)" + end if + + if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & + r > size(a)) then + stop "arg_quick_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; + end if + + k_local = k - l + 1 + + do while(.true.) + s = (l+r)/2_${intkind}$ ! Deliberate integer division + if(a(indx(s)) < a(indx(r))) call swap(indx(s), indx(r)) + if(a(indx(s)) < a(indx(l))) call swap(indx(s), indx(l)) + if(a(indx(r)) < a(indx(l))) call swap(indx(r), indx(l)) + pivot = a(indx(r)) ! Median + + i = l + do j = l, r-1 + if(a(indx(j)) <= pivot) then + call swap(indx(i), indx(j)) + i = i+1 + end if + end do + call swap(indx(r), indx(i)) + s = i-l+1 + if(k_local < s) then + r = i-1 + else if(k_local > s) then + l=i+1; k_local=k_local-s; + else + kth_smallest = indx(i) + return + end if + end do + + contains + subroutine swap(a, b) + ${inttype}$, intent(inout) :: a, b + ${inttype}$ :: tmp + tmp = a; a = b; b = tmp + end subroutine + end subroutine + #:endfor + #:endfor + +end module + + From 040fbcfe8e140072abc3816a577b003274006a5f Mon Sep 17 00:00:00 2001 From: gareth Date: Fri, 27 Aug 2021 23:25:19 +1000 Subject: [PATCH 02/51] adding tests --- src/stdlib_selection.fypp | 22 +-- src/tests/CMakeLists.txt | 1 + src/tests/Makefile.manual | 1 + src/tests/selection/CMakeLists.txt | 12 ++ src/tests/selection/Makefile.manual | 11 ++ src/tests/selection/common.fypp | 37 +++++ src/tests/selection/test_selection.fypp | 179 ++++++++++++++++++++++++ 7 files changed, 252 insertions(+), 11 deletions(-) create mode 100644 src/tests/selection/CMakeLists.txt create mode 100644 src/tests/selection/Makefile.manual create mode 100644 src/tests/selection/common.fypp create mode 100644 src/tests/selection/test_selection.fypp diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 74443620a..2b909f009 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -1,9 +1,9 @@ #:include "common.fypp" -! Specify kinds/types for the input array in quick_select and arg_quick_select +! Specify kinds/types for the input array in select and arg_select #:set ARRAY_KINDS_TYPES = INT_KINDS_TYPES + REAL_KINDS_TYPES ! The index arrays are of all INT_KINDS_TYPES -module stdlib_quick_select +module stdlib_selection ! ! This code was modified from a matlab implementation of "qselect" by Manolis ! Lourakis, https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect @@ -47,7 +47,7 @@ public select, arg_select interface select #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES - #:set name = rname("quick_select", 1, arraytype, arraykind, intkind) + #:set name = rname("select", 1, arraytype, arraykind, intkind) module procedure ${name}$ #:endfor #:endfor @@ -56,7 +56,7 @@ end interface interface arg_select #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES - #:set name = rname("arg_quick_select", 1, arraytype, arraykind, intkind) + #:set name = rname("arg_select", 1, arraytype, arraykind, intkind) module procedure ${name}$ #:endfor #:endfor @@ -66,9 +66,9 @@ contains #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES - #:set name = rname("quick_select", 1, arraytype, arraykind, intkind) + #:set name = rname("select", 1, arraytype, arraykind, intkind) subroutine ${name}$(a, k, kth_smallest, left, right) - !! quick_select - select the k-th smallest entry in a(:). + !! select - select the k-th smallest entry in a(:). !! !! Implements Hoares Quickselect algorithm !! (https://en.wikipedia.org/wiki/Quickselect) @@ -110,7 +110,7 @@ contains if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & r > size(a)) then - stop "quick_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; + stop "select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if k_local = k - l + 1 @@ -154,9 +154,9 @@ contains #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES - #:set name = rname("arg_quick_select", 1, arraytype, arraykind, intkind) + #:set name = rname("arg_select", 1, arraytype, arraykind, intkind) subroutine ${name}$(a, indx, k, kth_smallest, left, right) - !! arg_quick_select - find the index of the k-th smallest entry in `a(:)` + !! arg_select - find the index of the k-th smallest entry in `a(:)` !! !! Implements Hoares Quickselect algorithm !! https://en.wikipedia.org/wiki/Quickselect) @@ -198,12 +198,12 @@ contains if(present(right)) r = right if(size(a) /= size(indx)) then - stop "arg_quick_select must have size(a) == size(indx)" + stop "arg_select must have size(a) == size(indx)" end if if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & r > size(a)) then - stop "arg_quick_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; + stop "arg_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if k_local = k - l + 1 diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 01df5d678..36e442889 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -12,6 +12,7 @@ add_subdirectory(io) add_subdirectory(linalg) add_subdirectory(logger) add_subdirectory(optval) +add_subdirectory(selection) add_subdirectory(sorting) add_subdirectory(stats) add_subdirectory(string) diff --git a/src/tests/Makefile.manual b/src/tests/Makefile.manual index 3e801ad4b..c0e54ca91 100644 --- a/src/tests/Makefile.manual +++ b/src/tests/Makefile.manual @@ -6,6 +6,7 @@ all test clean: $(MAKE) -f Makefile.manual --directory=io $@ $(MAKE) -f Makefile.manual --directory=logger $@ $(MAKE) -f Makefile.manual --directory=optval $@ + $(MAKE) -f Makefile.manual --directory=selection $@ $(MAKE) -f Makefile.manual --directory=sorting $@ $(MAKE) -f Makefile.manual --directory=quadrature $@ $(MAKE) -f Makefile.manual --directory=stats $@ diff --git a/src/tests/selection/CMakeLists.txt b/src/tests/selection/CMakeLists.txt new file mode 100644 index 000000000..6b56c6d10 --- /dev/null +++ b/src/tests/selection/CMakeLists.txt @@ -0,0 +1,12 @@ +### Pre-process: .fpp -> .f90 via Fypp + +# Create a list of the files to be preprocessed +set(fppFiles + test_selection.fypp +) + +set(fyppFlags) + +fypp_f90("${fyppFlags}" "${fppFiles}" outFiles) + +ADDTEST(selection) diff --git a/src/tests/selection/Makefile.manual b/src/tests/selection/Makefile.manual new file mode 100644 index 000000000..6054a359c --- /dev/null +++ b/src/tests/selection/Makefile.manual @@ -0,0 +1,11 @@ +SRCFYPP =\ + test_selection.fypp + +SRCGEN = $(SRCFYPP:.fypp=.f90) + +$(SRCGEN): %.f90: %.fypp common.fypp + fypp $(FYPPFLAGS) $< $@ + +PROGS_SRC = $(SRCGEN) + +include ../Makefile.manual.test.mk diff --git a/src/tests/selection/common.fypp b/src/tests/selection/common.fypp new file mode 100644 index 000000000..7c7c44c68 --- /dev/null +++ b/src/tests/selection/common.fypp @@ -0,0 +1,37 @@ +#:mute + +#! Real kinds to be considered during templating +#:set REAL_KINDS = ["sp", "dp", "qp"] + +#! Real types to be considered during templating +#:set REAL_TYPES = ["real({})".format(k) for k in REAL_KINDS] + +#! Collected (kind, type) tuples for real types +#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) + +#! Integer kinds to be considered during templating +#:set INT_KINDS = ["int8", "int16", "int32", "int64"] + +#! Integer types to be considered during templating +#:set INT_TYPES = ["integer({})".format(k) for k in INT_KINDS] + +#! Collected (kind, type) tuples for integer types +#:set INT_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES)) + +#! Generates a routine name from a generic name, rank, type and kind +#! +#! Args: +#! gname (str): Generic name +#! rank (integer): Rank if exist +#! type (str): Type of the input +#! kind (str): kind of inputs variable +#! suffix (str): other identifier (could be used for output type/kind) +#! +#! Returns: +#! A string with a new name +#! +#:def rname(gname, rank, type, kind, suffix='') + $:"{0}_{1}_{2}{3}_{2}{3}".format(gname, rank, type[0], kind) if suffix == '' else "{0}_{1}_{2}{3}_{4}".format(gname, rank, type[0], kind, suffix) +#:enddef + +#:endmute diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp new file mode 100644 index 000000000..cdecb7262 --- /dev/null +++ b/src/tests/selection/test_selection.fypp @@ -0,0 +1,179 @@ +#:include "common.fypp" +! Specify kinds/types for the input array in quick_select and arg_quick_select +#:set ARRAY_KINDS_TYPES = INT_KINDS_TYPES + REAL_KINDS_TYPES +! The index arrays are of all INT_KINDS_TYPES + + +program test_selection + + use stdlib_error, only: check + use stdlib_kinds, only: int8, int16, int32, int64, sp, dp, qp + use stdlib_selection, only: select, arg_select + implicit none + + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("test_select", 1, arraytype, arraykind, intkind) + call ${name}$() + #:endfor + #:endfor + +contains + + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("test_select", 1, arraytype, arraykind, intkind) + subroutine ${name}$() + + ${inttype}$, parameter :: N = 10, Nr = min(HUGE(N)/2_int64, 100_int64), Nreps = 4 + ${arraytype}$ :: x(N), x_copy(N), mat(8), mat_copy(8), len1(1), len2(2), & + kth_smallest, random_vals(Nr), one = 1 + ${inttype}$ :: i, p, up_rank, down_rank, mid_rank + real(dp) :: random_doubles(Nr) ! Deliberately double precision for all cases + logical :: test1, test2, test3, any_failed + integer, parameter :: ip = ${intkind}$ + + ! x contains the numbers 1**2, 2**2, .... 10**2, with mixed-up order + x = (/( i**2, i=1, size(x) )/) + x(5:2:-1) = x(2:5) + x(10:8:-1) = x(8:10) + + ! Check that the ith-ranked entry of x really is i**2 + do i = 1, size(x) + x_copy = x + call select(x_copy, i, kth_smallest) + call check( (kth_smallest == i**2), " ${name}$: kth smallest entry should be i**2") + end do + + ! Check that it works when we specify "left" and know that the array + ! is partially sorted due to previous calls to quickselect + x_copy = x + do i = 1, size(x), 1 + call select(x_copy, i, kth_smallest, left=i) + call check( (kth_smallest == i**2), " ${name}$: kth smallest entry with left specified") + end do + + ! Check that it works when we specify "right" and know that the array + ! is partially sorted due to previous calls to quickselect + x_copy = x + do i = size(x), 1, -1 + call select(x_copy, i, kth_smallest, right=i) + call check( (kth_smallest == i**2), " ${name}$: kth smallest entry with right specified") + end do + + ! + ! Variants of test that came with the matlab documentation + ! + mat = one * [3, 2, 7, 4, 5, 1, 4, -1] + mat_copy = mat + call select(mat_copy, 1_ip, kth_smallest) + call check( kth_smallest == -1, " ${name}$: mat test 1") + mat_copy = mat + call select(mat_copy, 2_ip, kth_smallest) + call check( kth_smallest == 1, " ${name}$: mat test 2") + mat_copy = mat + call select(mat_copy, size(mat)+1_ip-4_ip, kth_smallest) + call check( kth_smallest == 4, " ${name}$: mat test 3") + mat_copy = mat + call select(mat_copy, 5_ip, kth_smallest) + call check( kth_smallest == 4, " ${name}$: mat test 4") + mat_copy = mat + call select(mat_copy, 6_ip, kth_smallest) + call check( kth_smallest == 4, " ${name}$: mat test 5") + mat_copy = mat + call select(mat_copy, 7_ip, kth_smallest) + call check( kth_smallest == 5, " ${name}$: mat test 6") + + ! Check it works for size(a) == 1 + len1(1) = -1 * one + call select(len1, 1_ip, kth_smallest) + call check(kth_smallest == -1, " ${name}$: array with size 1") + + ! Check it works for size(a) == 2 + len2 = [-3, -5]*one + call select(len2, 2_ip, kth_smallest) + call check(kth_smallest == -3, " ${name}$: array with size 2, test 1") + len2 = [-3, -5]*one + call select(len2, 1_ip, kth_smallest) + call check(kth_smallest == -5, " ${name}$: array with size 2, test 2") + len2 = [-1, -1]*one + call select(len2, 1_ip, kth_smallest) + call check(kth_smallest == -1, " ${name}$: array with size 2, test 3") + len2 = [-1, -1]*one + call select(len2, 2_ip, kth_smallest) + call check(kth_smallest == -1, " ${name}$: array with size 2, test 4") + + ! + ! Test using random data + ! + any_failed=.FALSE. + + ! Search for the pth-smallest rank, for all these p + ! (avoid end-points to enable constrained search tests) + do p = 3, Nr-2 + + ! Repeat for different random samples to try to expose any errors + do i = 1, Nreps + + ! Make random numbers of the correct type + call random_number(random_doubles) + random_vals = random_doubles * Nr + + call select(random_vals, p, kth_smallest) + + test1 = kth_smallest == random_vals(p) + test2 = all(random_vals(1:(p-1)) <= random_vals(p)) + test3 = all(random_vals(p) <= & + random_vals((p+1):size(random_vals))) + if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & + any_failed = .TRUE. + + ! Constrained search above 'p', providing 'left' + up_rank = p + (Nr-p)/2_ip ! Deliberate integer division + call select(random_vals, up_rank, kth_smallest, left=p) + + test1 = kth_smallest == random_vals(up_rank) + test2 = all(random_vals(1:(up_rank-1)) <= random_vals(up_rank)) + test3 = all(random_vals(up_rank) <= & + random_vals((up_rank+1):size(random_vals))) + if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & + any_failed = .TRUE. + + ! Constrained search below p, providing 'right' + down_rank = p - (p/2_ip) + call select(random_vals, down_rank, kth_smallest, right=p) + + test1 = kth_smallest == random_vals(down_rank) + test2 = all(random_vals(1:(down_rank-1)) <= & + random_vals(down_rank)) + test3 = all(random_vals(down_rank) <= & + random_vals((down_rank+1):size(random_vals))) + if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & + any_failed = .TRUE. + + ! Constrained search between up-ind and down-ind, proving left + ! and right. Make 'mid_rank' either above or below p + mid_rank = p - p/3_ip*mod(i,2_ip) + (Nr-p)/3_ip*(1_ip-mod(i,2_ip)) + call select(random_vals, mid_rank, kth_smallest, & + left=down_rank, right=up_rank) + + test1 = kth_smallest == random_vals(mid_rank) + test2 = all(random_vals(1:(mid_rank-1)) <= & + random_vals(mid_rank)) + test3 = all(random_vals(mid_rank) <= & + random_vals((mid_rank+1):size(random_vals))) + if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & + any_failed = .TRUE. + + end do + end do + + call check( (.not. any_failed), " ${name}$: random number test failed ") + + + end subroutine + #:endfor + #:endfor + + +end program From 5a0bc69125376fab804f58d56938008dbf5307d0 Mon Sep 17 00:00:00 2001 From: gareth Date: Fri, 27 Aug 2021 23:29:57 +1000 Subject: [PATCH 03/51] whitespace --- src/stdlib_selection.fypp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 2b909f009..18354814c 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -5,7 +5,7 @@ module stdlib_selection ! -! This code was modified from a matlab implementation of "qselect" by Manolis +! This code was modified from a matlab implementation of "qselect" by Manolis ! Lourakis, https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect ! Below is the licence of qselect ! @@ -22,7 +22,7 @@ module stdlib_selection ! this list of conditions and the following disclaimer in the documentation ! and/or other materials provided with the distribution ! * Neither the name of Foundation for Research and Technology - Hellas nor the -! names of its contributors may be used to endorse or promote products +! names of its contributors may be used to endorse or promote products ! derived from this software without specific prior written permission. ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -51,7 +51,7 @@ interface select module procedure ${name}$ #:endfor #:endfor -end interface +end interface interface arg_select #:for arraykind, arraytype in ARRAY_KINDS_TYPES @@ -99,29 +99,29 @@ contains !! The constraints are available if we have previously called the !! subroutine with different `k` (because of how `a(:)` becomes !! partially sorted, see documentation for `a(:)`). - + ${inttype}$ :: l, r, s, i, j, k_local ${arraytype}$ :: pivot - + l = 1_${intkind}$ if(present(left)) l = left r = size(a) if(present(right)) r = right - + if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & r > size(a)) then stop "select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - + k_local = k - l + 1 - + do while(.true.) s = (l+r)/2_${intkind}$ ! Deliberate integer division if(a(s) < a(r)) call swap(a(s), a(r)) if(a(s) < a(l)) call swap(a(s), a(l)) if(a(r) < a(l)) call swap(a(r), a(l)) pivot = a(r) ! Median - + i = l do j = l, r-1 if(a(j) <= pivot) then @@ -140,7 +140,7 @@ contains return end if end do - + contains subroutine swap(a, b) ${arraytype}$, intent(inout) :: a, b @@ -188,33 +188,33 @@ contains !! time. These constraints are available if we have previously !! called the subroutine with a different `k` (due to the way that !! `indx(:)` becomes partially sorted, see documentation for `indx(:)`). - + ${inttype}$ :: l, r, s, i, j, k_local ${arraytype}$ :: pivot - + l = 1_${intkind}$ if(present(left)) l = left r = size(a) if(present(right)) r = right - + if(size(a) /= size(indx)) then stop "arg_select must have size(a) == size(indx)" end if - + if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & r > size(a)) then stop "arg_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - + k_local = k - l + 1 - + do while(.true.) s = (l+r)/2_${intkind}$ ! Deliberate integer division if(a(indx(s)) < a(indx(r))) call swap(indx(s), indx(r)) if(a(indx(s)) < a(indx(l))) call swap(indx(s), indx(l)) if(a(indx(r)) < a(indx(l))) call swap(indx(r), indx(l)) pivot = a(indx(r)) ! Median - + i = l do j = l, r-1 if(a(indx(j)) <= pivot) then @@ -233,7 +233,7 @@ contains return end if end do - + contains subroutine swap(a, b) ${inttype}$, intent(inout) :: a, b From 20b632335a024cdc1129f1f34f94d6eacbf4096e Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 00:37:56 +1000 Subject: [PATCH 04/51] add specs --- doc/specs/stdlib_selection.md | 258 ++++++++++++++++++++++++++++++++++ 1 file changed, 258 insertions(+) create mode 100644 doc/specs/stdlib_selection.md diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md new file mode 100644 index 000000000..ad2b6d376 --- /dev/null +++ b/doc/specs/stdlib_selection.md @@ -0,0 +1,258 @@ +--- +title: Selection Procedures +--- + +# The `stdlib_selection` module + +[TOC] + +## Overview of selection + +Suppose you wish to find the value of the kth-smallest entry in an array, or the index +of that value. While it could be done by sorting the whole array using `sort` or `sort_index` +from `stdlib_sorting` and then finding the k-th entry, that would require O(N x LOG(N)) +time. However selection of a single entry can be done in O(N) time, so is much faster for large arrays. +This is useful, for example, to quickly find the median of an array, or some other percentile. + +The Fortran Standard Library therefore provides a module, `stdlib_selection`, which implements +selection algorithms. + +## Overview of the module + +The module `stdlib_selection` defines two generic subroutines: +* `select` is used to find the kth-smallest entry of an array. The input +array is also modified in-place, and on return will be partially sorted +such that `all(array(1:k) <= array(k)))` and `all(array(k) <= array((k+1):size(array)))`. +The user can optionally specify `left` and `right` indices to constrain the search +for the kth-smallest value. This can be useful if you have previously called `select` +to find a smaller or larger rank (that will have led to partial sorting of +`array`, thus implying some constraint on the location). + +* `arg_select` is used to find the index of the kth-smallest entry of an array. +In this case the input array is not modified, but the user must provide an +input index array with the same size as `array`, having unique indices from +`1:size(array)`, which is modified instead. On return the index array is modified +such that `all(array(index(1:k)) <= array(index(k)))` and `all(array(k) <= array(k+1:size(array)))`. +The user can optionally specify `left` and `right` indices to constrain the search +for the kth-smallest value. This can be useful if you have previously called `select` +to find a smaller or larger rank (that will have led to partial sorting of +`index`, thus implying some constraint on the location). + +#### Licensing + +The Fortran Standard Library is distributed under the MIT +License. However components of the library may be based on code with +additional licensing restrictions. In particular `select` and `arg_select` +were derived by modifying a matlab implementation of "qselect" by Manolis +Lourakis, https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect +Below is the licence of the matlab qselect + +Copyright (c) 2018, Manolis Lourakis +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution +* Neither the name of Foundation for Research and Technology - Hellas nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +### Specifications of the `stdlib_selection` procedures + +#### `select` - find the kth smallest value in an input array + +##### Status + +Experimental + +##### Description + +Returns the k-th smallest value of array `a(:)`, and also partially sorts `a(:)` +such that `all(a(1:k) <= a(k))` and `all(a(k) <= a((k+1):size(a)))` + +##### Syntax + +`call [[stdlib_selection(module):select(interface)]]( a, k, kth_smallest [, left, right ] )` + +##### Class + +Generic subroutine. + +##### Arguments + +`a` : shall be a rank one array of any of the types: +`integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, +`real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(inout)` argument. On input it is +the array in which we search for the kth smallest entry. On return its elements +will be partially sorted such that: + `all( a(1:k-1) <= a(k) )` and `all(a(k) <= a(k+1:size(a)))`. + +`k`: shall be a scalar with any of the types: +`integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It +is an `intent(in)` argument. We search for the `k`-th smallest entry of `a(:)`. + +`kth_smallest`: shall be a scalar with the same type as `a`. On return it contains +the k-th smallest entry of `a(:)`. + +`left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` +argument. If specified then we assume the k-th smallest value is definitely contained +in `a(left:size(a))`. If not present it is 1. This is typically useful if multiple calls +to `select` are made, because the partial sorting of `a` implies constraints on where +we need to search. + +`right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` +argument. If specified then we assume the k-th smallest value is definitely contained +in `a(1:right)`. If not present it is `size(a)`. This is typically useful if multiple calls +to `select` are made, because the partial sorting of `a` implies constraints on where +we need to search. + +##### Notes + +Selection of a single value should have runtime of O(`size(a)`), so it is asymptotically faster +than sorting `a` entirely. + +##### Example + +```fortran + program demo_select + use stdlib_selection, only: select + implicit none + + real, allocatable :: array(:) + real :: kth_smallest + integer :: k, left, right + + array = 1.0 * [3, 2, 7, 4, 5, 1, 4, -1] + + k = 2 + call select(array, k, kth_smallest) + print*, kth_smallest ! print 1 + + k = 7 + ! Due to the previous call to select, we know for sure this is in an + ! index >= 2 + call select(array, k, kth_smallest, left=2) + print*, kth_smallest ! print 5 + + k = 6 + ! Due to the previous two calls to select, we know for sure this is in + ! an index >= 2 and <= 7 + call select(array, k, kth_smallest, left=2, right=7) + print*, kth_smallest ! print 4 + + end program demo_select +``` + +#### `arg_select` - find the kth smallest value in an input array + +##### Status + +Experimental + +##### Description + +Returns the index of the k-th smallest value of array `a(:)`, and also partially sorts +the index-array `indx(:)` such that `all(a(indx(1:k)) <= a(indx(k)))` and +`all(a(indx(k)) <= a(indx((k+1):size(a))))` + +##### Syntax + +`call [[stdlib_selection(module):arg_select(interface)]]( a, indx, k, kth_smallest [, left, right ] )` + +##### Class + +Generic subroutine. + +##### Arguments + +`a` : shall be a rank one array of any of the types: +`integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, +`real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(in)` argument. On input it is +the array in which we search for the kth smallest entry. + +`indx`: shall be a rank one array with the same size as `a`, containing integers +from `1:size(a)` in any order. It is of any of the types: +`integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It is an +`intent(inout)` argument. On return its elements will define a partial sorting of `a(:)` such that: + `all( a(indx(1:k-1)) <= a(indx(k)) )` and `all(a(indx(k)) <= a(indx(k+1:size(a))))`. + +`k`: shall be a scalar with the same type as `indx`. It is an `intent(in)` +argument. We search for the `k`-th smallest entry of `a(:)`. + +`kth_smallest`: a scalar with the same type as `indx`. It is an `intent(out)` argument, +and on return it contains the index of the k-th smallest entry of `a(:)`. + +`left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` +argument. If specified then we assume the k-th smallest value is definitely contained +in `a(indx(left:size(a)))`. If not present it is 1. This is typically useful if multiple calls +to `select` are made, because the partial sorting of `a` implies constraints on where +we need to search. + +`right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` +argument. If specified then we assume the k-th smallest value is definitely contained +in `a(indx(1:right))`. If not present it is `size(a)`. This is typically useful if multiple calls +to `select` are made, because the reordering of `indx` implies constraints on +where we need to search. + +##### Notes + +`arg_select` does not modify `a`, unlike `select`. + +While it is essential that that `indx` contains the integers 1:size(a), the code does not check for this. + +Selection of a single value should have runtime of O(`size(a)`), so it is asymptotically faster +than sorting `a` entirely. + + +##### Example + + +```fortran + program demo_select + use stdlib_selection, only: arg_select + implicit none + + real, allocatable :: array(:) + integer, allocatable :: indx(:) + integer :: kth_smallest + integer :: k, left, right + + array = 1.0 * [3, 2, 7, 4, 5, 1, 4, -1] + indx = (/( k, k = 1, size(array) )/) + + k = 2 + call arg_select(array, indx, k, kth_smallest) + print*, array(kth_smallest) ! print 1 + + k = 7 + ! Due to the previous call to arg_select, we know for sure this is in an + ! index >= 2 + call arg_select(array, indx, k, kth_smallest, left=2) + print*, array(kth_smallest) ! print 5 + + k = 6 + ! Due to the previous two calls to arg_select, we know for sure this is in + ! an index >= 2 and <= 7 + call arg_select(array, indx, k, kth_smallest, left=2, right=7) + print*, array(kth_smallest) ! print 4 + + end program demo_select +``` + From 4f8221f6bb2f2b7276fd89272386627c04db1b95 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 01:07:37 +1000 Subject: [PATCH 05/51] add in selection tests --- src/tests/selection/test_selection.fypp | 218 ++++++++++++++++++++++-- 1 file changed, 206 insertions(+), 12 deletions(-) diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index cdecb7262..019fe853b 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -1,5 +1,5 @@ #:include "common.fypp" -! Specify kinds/types for the input array in quick_select and arg_quick_select +! Specify kinds/types for the input array in select and arg_select #:set ARRAY_KINDS_TYPES = INT_KINDS_TYPES + REAL_KINDS_TYPES ! The index arrays are of all INT_KINDS_TYPES @@ -18,6 +18,12 @@ program test_selection #:endfor #:endfor + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("test_arg_select", 1, arraytype, arraykind, intkind) + call ${name}$() + #:endfor + #:endfor contains #:for arraykind, arraytype in ARRAY_KINDS_TYPES @@ -25,8 +31,10 @@ contains #:set name = rname("test_select", 1, arraytype, arraykind, intkind) subroutine ${name}$() - ${inttype}$, parameter :: N = 10, Nr = min(HUGE(N)/2_int64, 100_int64), Nreps = 4 - ${arraytype}$ :: x(N), x_copy(N), mat(8), mat_copy(8), len1(1), len2(2), & + ${inttype}$, parameter :: N = 10, Nreps = 4, Nm = 8 + ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 100_int64) ! < HUGE(N) + + ${arraytype}$ :: x(N), x_copy(N), mat(Nm), mat_copy(Nm), len1(1), len2(2), & kth_smallest, random_vals(Nr), one = 1 ${inttype}$ :: i, p, up_rank, down_rank, mid_rank real(dp) :: random_doubles(Nr) ! Deliberately double precision for all cases @@ -34,12 +42,12 @@ contains integer, parameter :: ip = ${intkind}$ ! x contains the numbers 1**2, 2**2, .... 10**2, with mixed-up order - x = (/( i**2, i=1, size(x) )/) + x = (/( i**2, i=1, size(x, kind=ip) )/) x(5:2:-1) = x(2:5) x(10:8:-1) = x(8:10) ! Check that the ith-ranked entry of x really is i**2 - do i = 1, size(x) + do i = 1, size(x, kind=ip) x_copy = x call select(x_copy, i, kth_smallest) call check( (kth_smallest == i**2), " ${name}$: kth smallest entry should be i**2") @@ -48,7 +56,7 @@ contains ! Check that it works when we specify "left" and know that the array ! is partially sorted due to previous calls to quickselect x_copy = x - do i = 1, size(x), 1 + do i = 1, size(x, kind=ip), 1 call select(x_copy, i, kth_smallest, left=i) call check( (kth_smallest == i**2), " ${name}$: kth smallest entry with left specified") end do @@ -56,7 +64,7 @@ contains ! Check that it works when we specify "right" and know that the array ! is partially sorted due to previous calls to quickselect x_copy = x - do i = size(x), 1, -1 + do i = size(x, kind=ip), 1, -1 call select(x_copy, i, kth_smallest, right=i) call check( (kth_smallest == i**2), " ${name}$: kth smallest entry with right specified") end do @@ -72,7 +80,7 @@ contains call select(mat_copy, 2_ip, kth_smallest) call check( kth_smallest == 1, " ${name}$: mat test 2") mat_copy = mat - call select(mat_copy, size(mat)+1_ip-4_ip, kth_smallest) + call select(mat_copy, size(mat, kind=ip)+1_ip-4_ip, kth_smallest) call check( kth_smallest == 4, " ${name}$: mat test 3") mat_copy = mat call select(mat_copy, 5_ip, kth_smallest) @@ -124,7 +132,7 @@ contains test1 = kth_smallest == random_vals(p) test2 = all(random_vals(1:(p-1)) <= random_vals(p)) test3 = all(random_vals(p) <= & - random_vals((p+1):size(random_vals))) + random_vals((p+1):size(random_vals, kind=ip))) if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & any_failed = .TRUE. @@ -135,7 +143,7 @@ contains test1 = kth_smallest == random_vals(up_rank) test2 = all(random_vals(1:(up_rank-1)) <= random_vals(up_rank)) test3 = all(random_vals(up_rank) <= & - random_vals((up_rank+1):size(random_vals))) + random_vals((up_rank+1):size(random_vals, kind=ip))) if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & any_failed = .TRUE. @@ -147,7 +155,7 @@ contains test2 = all(random_vals(1:(down_rank-1)) <= & random_vals(down_rank)) test3 = all(random_vals(down_rank) <= & - random_vals((down_rank+1):size(random_vals))) + random_vals((down_rank+1):size(random_vals, kind=ip))) if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & any_failed = .TRUE. @@ -161,7 +169,7 @@ contains test2 = all(random_vals(1:(mid_rank-1)) <= & random_vals(mid_rank)) test3 = all(random_vals(mid_rank) <= & - random_vals((mid_rank+1):size(random_vals))) + random_vals((mid_rank+1):size(random_vals, kind=ip))) if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & any_failed = .TRUE. @@ -176,4 +184,190 @@ contains #:endfor + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("test_arg_select", 1, arraytype, arraykind, intkind) + subroutine ${name}$ + + ${inttype}$, parameter :: N = 10, Nreps = 4, Nm = 8 + ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 100_int64) ! < HUGE(N) + integer, parameter :: ip = ${intkind}$ + + ${arraytype}$ :: x(N), mat(Nm), len1(1), len2(2), random_vals(Nr), one=1 + + integer(ip) :: indx(N), indx_copy(N), indx_mat(Nm), indx_mat_copy(Nm), & + indx_len1(1), indx_len2(2), indx_r(Nr) + real(dp) :: random_doubles(Nr) ! Deliberately double precision for all cases + integer(ip) :: i, j, p, up_rank, down_rank, mid_rank, kth_smallest + logical :: test1, test2, test3, any_failed + + ! Make x contain 1**2, 2**2, .... 10**2, but mix up the order + x = (/( i**2, i=1, size(x, kind=ip) )/) + x(5:2:-1) = x(2:5) + x(10:8:-1) = x(8:10) + + indx = (/(i, i = 1, size(x, kind=ip))/) + + ! Check that the ith ranked entry of x equals i**2 + do i = 1, size(x, kind=ip) + indx_copy = indx + call arg_select(x, indx, i, kth_smallest) + call check(x(kth_smallest) == i**2, " ${name}$: kth smallest entry should be i**2") + end do + + ! Check that it works when we specify "left" and know that the index + ! array is partially sorted due to previous calls to arg_select + indx_copy = indx + do i = 1, size(x, kind=ip), 1 + call arg_select(x, indx_copy, i, kth_smallest, left=i) + call check( (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with left specified") + end do + + ! Check that it works when we specify "right" and know that the index + ! array is partially sorted due to previous calls to arg_select + indx_copy = indx + do i = size(x, kind=ip), 1, -1 + call arg_select(x, indx_copy, i, kth_smallest, right=i) + call check( (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with right specified") + end do + + ! + ! Variants of test that came with the matlab documentation for qselect + ! + mat = one * [3, 2, 7, 4, 5, 1, 4, -1] + indx_mat = (/( i, i = 1, size(mat, kind=ip) )/) + + indx_mat_copy = indx_mat + call arg_select(mat, indx_mat_copy, 1_ip, kth_smallest) + call check( mat(kth_smallest) == -1, " ${name}$: mat test 1") + + indx_mat_copy = indx_mat + call arg_select(mat, indx_mat_copy, 2_ip, kth_smallest) + call check( mat(kth_smallest) == 1, " ${name}$: mat test 2") + + indx_mat_copy = indx_mat + call arg_select(mat, indx_mat_copy, size(mat, kind=ip)+1_ip-4_ip, & + kth_smallest) + call check( mat(kth_smallest) == 4, " ${name}$: mat test 3") + + indx_mat_copy = indx_mat + call arg_select(mat, indx_mat_copy, 5_ip, kth_smallest) + call check( mat(kth_smallest) == 4, " ${name}$: mat test 4") + + indx_mat_copy = indx_mat + call arg_select(mat, indx_mat_copy, 6_ip, kth_smallest) + call check( mat(kth_smallest) == 4, " ${name}$: mat test 5") + + indx_mat_copy = indx_mat + call arg_select(mat, indx_mat_copy, 7_ip, kth_smallest) + call check( mat(kth_smallest) == 5, " ${name}$: mat test 6") + + ! Check it works for size(a) == 1 + len1(1) = -1 * one + indx_len1(1) = 1 + call arg_select(len1, indx_len1, 1_ip, kth_smallest) + call check(len1(kth_smallest) == -1, " ${name}$: array with size 1") + + ! Check it works for size(a) == 2 + len2 = [-3, -5] * one + indx_len2 = [1_ip, 2_ip] + call arg_select(len2, indx_len2, 2_ip, kth_smallest) + call check(len2(kth_smallest) == -3, " ${name}$: array with size 2, test 1") + + len2 = [-3, -5] * one + indx_len2 = [1_ip, 2_ip] + call arg_select(len2, indx_len2, 1_ip, kth_smallest) + call check(len2(kth_smallest) == -5, " ${name}$: array with size 2, test 2") + + len2 = [-1, -1] * one + indx_len2 = [1_ip, 2_ip] + call arg_select(len2, indx_len2, 1_ip, kth_smallest) + call check(len2(kth_smallest) == -1, " ${name}$: array with size 2, test 3") + + len2 = [-1, -1] * one + indx_len2 = [1_ip, 2_ip] + call arg_select(len2, indx_len2, 2_ip, kth_smallest) + call check(len2(kth_smallest) == -1, " ${name}$: array with size 2, test 4") + + ! + ! Test using random data + ! + any_failed=.FALSE. + + ! Search for the pth-smallest, for all these p (avoid end-points to + ! enable additional tests using "left", "right" arguments) + do p = 3, Nr-2 + + ! Repeat for many random samples to try to expose any errors + do i = 1, Nreps + + ! Make random numbers of the correct type + call random_number(random_doubles) + random_vals = random_doubles * Nr + + indx_r = (/( j, j = 1, size(random_vals, kind=ip) )/) + + ! Standard arg_select + call arg_select(random_vals, indx_r, p, kth_smallest) + + test1 = random_vals(kth_smallest) == random_vals(indx_r(p)) + test2 = all(random_vals(indx_r(1:(p-1))) <= & + random_vals(indx_r(p))) + test3 = all(random_vals(indx_r(p)) <= & + random_vals(indx_r((p+1):size(random_vals, kind=ip)))) + if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & + any_failed = .TRUE. + + ! Constrained search for a rank above 'p', providing 'left' + up_rank = p + (Nr-p)/2_ip ! Deliberate integer division + call arg_select(random_vals, indx_r, up_rank, & + kth_smallest, left=p) + + test1 = random_vals(kth_smallest) == & + random_vals(indx_r(up_rank)) + test2 = all(random_vals(indx_r(1:(up_rank-1))) <= & + random_vals(indx_r(up_rank))) + test3 = all(random_vals(indx_r(up_rank)) <= & + random_vals(indx_r((up_rank+1):size(random_vals, kind=ip)))) + if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & + any_failed = .TRUE. + + ! Constrained search for a rank below p, providing 'right' + down_rank = p - (p/2_ip) + call arg_select(random_vals, indx_r, down_rank, & + kth_smallest, right=p) + + test1 = random_vals(kth_smallest) == & + random_vals(indx_r(down_rank)) + test2 = all(random_vals(indx_r(1:(down_rank-1))) <= & + random_vals(indx_r(down_rank))) + test3 = all(random_vals(indx_r(down_rank)) <= & + random_vals(indx_r((down_rank+1):size(random_vals, kind=ip)))) + if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & + any_failed = .TRUE. + + ! Constrained search for a rank between up-ind and down-ind, + ! proving left and right. 'mid_rank' is either above or below p + mid_rank = p - p/3_ip*mod(i,2_ip) + (Nr-p)/3_ip*(1_ip-mod(i,2_ip)) + call arg_select(random_vals, indx_r, mid_rank, & + kth_smallest, left=down_rank, right=up_rank) + + test1 = random_vals(kth_smallest) == & + random_vals(indx_r(mid_rank)) + test2 = all(random_vals(indx_r(1:(mid_rank-1))) <= & + random_vals(indx_r(mid_rank))) + test3 = all(random_vals(indx_r(mid_rank)) <= & + random_vals(indx_r((mid_rank+1):size(random_vals, kind=ip)))) + if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & + any_failed = .TRUE. + + end do + end do + + call check( (.not. any_failed), " ${name}$: random number test failed ") + + end subroutine + #:endfor + #:endfor + end program From 5ebea048ccc70dc8133b9dd12c9df2ef71889d9f Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 01:08:44 +1000 Subject: [PATCH 06/51] whitespace --- src/tests/selection/test_selection.fypp | 68 ++++++++++++------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index 019fe853b..d2017ed4c 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -188,7 +188,7 @@ contains #:for intkind, inttype in INT_KINDS_TYPES #:set name = rname("test_arg_select", 1, arraytype, arraykind, intkind) subroutine ${name}$ - + ${inttype}$, parameter :: N = 10, Nreps = 4, Nm = 8 ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 100_int64) ! < HUGE(N) integer, parameter :: ip = ${intkind}$ @@ -200,21 +200,21 @@ contains real(dp) :: random_doubles(Nr) ! Deliberately double precision for all cases integer(ip) :: i, j, p, up_rank, down_rank, mid_rank, kth_smallest logical :: test1, test2, test3, any_failed - + ! Make x contain 1**2, 2**2, .... 10**2, but mix up the order x = (/( i**2, i=1, size(x, kind=ip) )/) x(5:2:-1) = x(2:5) x(10:8:-1) = x(8:10) - + indx = (/(i, i = 1, size(x, kind=ip))/) - + ! Check that the ith ranked entry of x equals i**2 do i = 1, size(x, kind=ip) indx_copy = indx call arg_select(x, indx, i, kth_smallest) call check(x(kth_smallest) == i**2, " ${name}$: kth smallest entry should be i**2") end do - + ! Check that it works when we specify "left" and know that the index ! array is partially sorted due to previous calls to arg_select indx_copy = indx @@ -222,7 +222,7 @@ contains call arg_select(x, indx_copy, i, kth_smallest, left=i) call check( (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with left specified") end do - + ! Check that it works when we specify "right" and know that the index ! array is partially sorted due to previous calls to arg_select indx_copy = indx @@ -230,86 +230,86 @@ contains call arg_select(x, indx_copy, i, kth_smallest, right=i) call check( (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with right specified") end do - + ! ! Variants of test that came with the matlab documentation for qselect ! mat = one * [3, 2, 7, 4, 5, 1, 4, -1] indx_mat = (/( i, i = 1, size(mat, kind=ip) )/) - + indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 1_ip, kth_smallest) call check( mat(kth_smallest) == -1, " ${name}$: mat test 1") - + indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 2_ip, kth_smallest) call check( mat(kth_smallest) == 1, " ${name}$: mat test 2") - + indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, size(mat, kind=ip)+1_ip-4_ip, & kth_smallest) call check( mat(kth_smallest) == 4, " ${name}$: mat test 3") - + indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 5_ip, kth_smallest) call check( mat(kth_smallest) == 4, " ${name}$: mat test 4") - + indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 6_ip, kth_smallest) call check( mat(kth_smallest) == 4, " ${name}$: mat test 5") - + indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 7_ip, kth_smallest) call check( mat(kth_smallest) == 5, " ${name}$: mat test 6") - + ! Check it works for size(a) == 1 len1(1) = -1 * one indx_len1(1) = 1 call arg_select(len1, indx_len1, 1_ip, kth_smallest) call check(len1(kth_smallest) == -1, " ${name}$: array with size 1") - + ! Check it works for size(a) == 2 len2 = [-3, -5] * one indx_len2 = [1_ip, 2_ip] call arg_select(len2, indx_len2, 2_ip, kth_smallest) call check(len2(kth_smallest) == -3, " ${name}$: array with size 2, test 1") - + len2 = [-3, -5] * one indx_len2 = [1_ip, 2_ip] call arg_select(len2, indx_len2, 1_ip, kth_smallest) call check(len2(kth_smallest) == -5, " ${name}$: array with size 2, test 2") - + len2 = [-1, -1] * one indx_len2 = [1_ip, 2_ip] call arg_select(len2, indx_len2, 1_ip, kth_smallest) call check(len2(kth_smallest) == -1, " ${name}$: array with size 2, test 3") - + len2 = [-1, -1] * one indx_len2 = [1_ip, 2_ip] call arg_select(len2, indx_len2, 2_ip, kth_smallest) call check(len2(kth_smallest) == -1, " ${name}$: array with size 2, test 4") - + ! ! Test using random data ! any_failed=.FALSE. - + ! Search for the pth-smallest, for all these p (avoid end-points to ! enable additional tests using "left", "right" arguments) do p = 3, Nr-2 - + ! Repeat for many random samples to try to expose any errors do i = 1, Nreps - + ! Make random numbers of the correct type call random_number(random_doubles) random_vals = random_doubles * Nr - + indx_r = (/( j, j = 1, size(random_vals, kind=ip) )/) - + ! Standard arg_select call arg_select(random_vals, indx_r, p, kth_smallest) - + test1 = random_vals(kth_smallest) == random_vals(indx_r(p)) test2 = all(random_vals(indx_r(1:(p-1))) <= & random_vals(indx_r(p))) @@ -317,12 +317,12 @@ contains random_vals(indx_r((p+1):size(random_vals, kind=ip)))) if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & any_failed = .TRUE. - + ! Constrained search for a rank above 'p', providing 'left' up_rank = p + (Nr-p)/2_ip ! Deliberate integer division call arg_select(random_vals, indx_r, up_rank, & kth_smallest, left=p) - + test1 = random_vals(kth_smallest) == & random_vals(indx_r(up_rank)) test2 = all(random_vals(indx_r(1:(up_rank-1))) <= & @@ -331,12 +331,12 @@ contains random_vals(indx_r((up_rank+1):size(random_vals, kind=ip)))) if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & any_failed = .TRUE. - + ! Constrained search for a rank below p, providing 'right' down_rank = p - (p/2_ip) call arg_select(random_vals, indx_r, down_rank, & kth_smallest, right=p) - + test1 = random_vals(kth_smallest) == & random_vals(indx_r(down_rank)) test2 = all(random_vals(indx_r(1:(down_rank-1))) <= & @@ -345,13 +345,13 @@ contains random_vals(indx_r((down_rank+1):size(random_vals, kind=ip)))) if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & any_failed = .TRUE. - + ! Constrained search for a rank between up-ind and down-ind, ! proving left and right. 'mid_rank' is either above or below p mid_rank = p - p/3_ip*mod(i,2_ip) + (Nr-p)/3_ip*(1_ip-mod(i,2_ip)) call arg_select(random_vals, indx_r, mid_rank, & kth_smallest, left=down_rank, right=up_rank) - + test1 = random_vals(kth_smallest) == & random_vals(indx_r(mid_rank)) test2 = all(random_vals(indx_r(1:(mid_rank-1))) <= & @@ -360,12 +360,12 @@ contains random_vals(indx_r((mid_rank+1):size(random_vals, kind=ip)))) if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & any_failed = .TRUE. - + end do end do - + call check( (.not. any_failed), " ${name}$: random number test failed ") - + end subroutine #:endfor #:endfor From a5a4005c0cf1b4bcbc24277306fe6fcc43e6ce1a Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 01:14:38 +1000 Subject: [PATCH 07/51] fix whitespace --- doc/specs/stdlib_selection.md | 48 +++++++++++++++++------------------ 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index ad2b6d376..3d0717471 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -10,8 +10,8 @@ title: Selection Procedures Suppose you wish to find the value of the kth-smallest entry in an array, or the index of that value. While it could be done by sorting the whole array using `sort` or `sort_index` -from `stdlib_sorting` and then finding the k-th entry, that would require O(N x LOG(N)) -time. However selection of a single entry can be done in O(N) time, so is much faster for large arrays. +from `stdlib_sorting` and then finding the k-th entry, that would require O(N x LOG(N)) +time. However selection of a single entry can be done in O(N) time, so is much faster for large arrays. This is useful, for example, to quickly find the median of an array, or some other percentile. The Fortran Standard Library therefore provides a module, `stdlib_selection`, which implements @@ -21,17 +21,17 @@ selection algorithms. The module `stdlib_selection` defines two generic subroutines: * `select` is used to find the kth-smallest entry of an array. The input -array is also modified in-place, and on return will be partially sorted +array is also modified in-place, and on return will be partially sorted such that `all(array(1:k) <= array(k)))` and `all(array(k) <= array((k+1):size(array)))`. The user can optionally specify `left` and `right` indices to constrain the search for the kth-smallest value. This can be useful if you have previously called `select` to find a smaller or larger rank (that will have led to partial sorting of `array`, thus implying some constraint on the location). -* `arg_select` is used to find the index of the kth-smallest entry of an array. -In this case the input array is not modified, but the user must provide an -input index array with the same size as `array`, having unique indices from -`1:size(array)`, which is modified instead. On return the index array is modified +* `arg_select` is used to find the index of the kth-smallest entry of an array. +In this case the input array is not modified, but the user must provide an +input index array with the same size as `array`, having unique indices from +`1:size(array)`, which is modified instead. On return the index array is modified such that `all(array(index(1:k)) <= array(index(k)))` and `all(array(k) <= array(k+1:size(array)))`. The user can optionally specify `left` and `right` indices to constrain the search for the kth-smallest value. This can be useful if you have previously called `select` @@ -100,16 +100,16 @@ Generic subroutine. `a` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, `real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(inout)` argument. On input it is -the array in which we search for the kth smallest entry. On return its elements +the array in which we search for the kth smallest entry. On return its elements will be partially sorted such that: `all( a(1:k-1) <= a(k) )` and `all(a(k) <= a(k+1:size(a)))`. `k`: shall be a scalar with any of the types: -`integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It +`integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It is an `intent(in)` argument. We search for the `k`-th smallest entry of `a(:)`. `kth_smallest`: shall be a scalar with the same type as `a`. On return it contains -the k-th smallest entry of `a(:)`. +the k-th smallest entry of `a(:)`. `left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained @@ -134,10 +134,10 @@ than sorting `a` entirely. program demo_select use stdlib_selection, only: select implicit none - + real, allocatable :: array(:) real :: kth_smallest - integer :: k, left, right + integer :: k, left, right array = 1.0 * [3, 2, 7, 4, 5, 1, 4, -1] @@ -152,7 +152,7 @@ than sorting `a` entirely. print*, kth_smallest ! print 5 k = 6 - ! Due to the previous two calls to select, we know for sure this is in + ! Due to the previous two calls to select, we know for sure this is in ! an index >= 2 and <= 7 call select(array, k, kth_smallest, left=2, right=7) print*, kth_smallest ! print 4 @@ -168,8 +168,8 @@ Experimental ##### Description -Returns the index of the k-th smallest value of array `a(:)`, and also partially sorts -the index-array `indx(:)` such that `all(a(indx(1:k)) <= a(indx(k)))` and +Returns the index of the k-th smallest value of array `a(:)`, and also partially sorts +the index-array `indx(:)` such that `all(a(indx(1:k)) <= a(indx(k)))` and `all(a(indx(k)) <= a(indx((k+1):size(a))))` ##### Syntax @@ -185,7 +185,7 @@ Generic subroutine. `a` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, `real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(in)` argument. On input it is -the array in which we search for the kth smallest entry. +the array in which we search for the kth smallest entry. `indx`: shall be a rank one array with the same size as `a`, containing integers from `1:size(a)` in any order. It is of any of the types: @@ -197,7 +197,7 @@ from `1:size(a)` in any order. It is of any of the types: argument. We search for the `k`-th smallest entry of `a(:)`. `kth_smallest`: a scalar with the same type as `indx`. It is an `intent(out)` argument, -and on return it contains the index of the k-th smallest entry of `a(:)`. +and on return it contains the index of the k-th smallest entry of `a(:)`. `left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained @@ -213,30 +213,30 @@ where we need to search. ##### Notes -`arg_select` does not modify `a`, unlike `select`. +`arg_select` does not modify `a`, unlike `select`. While it is essential that that `indx` contains the integers 1:size(a), the code does not check for this. Selection of a single value should have runtime of O(`size(a)`), so it is asymptotically faster -than sorting `a` entirely. +than sorting `a` entirely. ##### Example ```fortran - program demo_select + program demo_arg_select use stdlib_selection, only: arg_select implicit none - + real, allocatable :: array(:) integer, allocatable :: indx(:) integer :: kth_smallest - integer :: k, left, right + integer :: k, left, right array = 1.0 * [3, 2, 7, 4, 5, 1, 4, -1] indx = (/( k, k = 1, size(array) )/) - + k = 2 call arg_select(array, indx, k, kth_smallest) print*, array(kth_smallest) ! print 1 @@ -248,7 +248,7 @@ than sorting `a` entirely. print*, array(kth_smallest) ! print 5 k = 6 - ! Due to the previous two calls to arg_select, we know for sure this is in + ! Due to the previous two calls to arg_select, we know for sure this is in ! an index >= 2 and <= 7 call arg_select(array, indx, k, kth_smallest, left=2, right=7) print*, array(kth_smallest) ! print 4 From d7d1a0b4d163c4f09a28fbc0569f3c7102d4926b Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 01:29:38 +1000 Subject: [PATCH 08/51] typo --- src/stdlib_selection.fypp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 18354814c..747afc8d0 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -183,7 +183,7 @@ contains !! and also that: !! `maxval(a(indx(1:(left-1)))) <= minval(a(indx(left:right)))` !! and: - !! `maxavl(a(indx(left:right))) <= minval(a(indx((right+1):size(a))))` + !! `maxval(a(indx(left:right))) <= minval(a(indx((right+1):size(a))))` !! then one or both bounds can be specified to reduce the search !! time. These constraints are available if we have previously !! called the subroutine with a different `k` (due to the way that From 53054439bbd1f60c9352e2c0dc4e89e9e17e3f65 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 09:11:44 +1000 Subject: [PATCH 09/51] use error stop --- src/stdlib_selection.fypp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 747afc8d0..9a29bbdd7 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -110,7 +110,7 @@ contains if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & r > size(a)) then - stop "select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; + error stop "select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if k_local = k - l + 1 @@ -198,12 +198,12 @@ contains if(present(right)) r = right if(size(a) /= size(indx)) then - stop "arg_select must have size(a) == size(indx)" + error stop "arg_select must have size(a) == size(indx)" end if if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & r > size(a)) then - stop "arg_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; + error stop "arg_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if k_local = k - l + 1 From 054d7ee9d697fc1828c4a866f079da72a08665b3 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 09:16:48 +1000 Subject: [PATCH 10/51] make tests faster --- src/tests/selection/test_selection.fypp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index d2017ed4c..4b497c1f1 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -31,8 +31,8 @@ contains #:set name = rname("test_select", 1, arraytype, arraykind, intkind) subroutine ${name}$() - ${inttype}$, parameter :: N = 10, Nreps = 4, Nm = 8 - ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 100_int64) ! < HUGE(N) + ${inttype}$, parameter :: N = 10, Nreps = 2, Nm = 8 + ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 25_int64) ! < HUGE(N) ${arraytype}$ :: x(N), x_copy(N), mat(Nm), mat_copy(Nm), len1(1), len2(2), & kth_smallest, random_vals(Nr), one = 1 @@ -189,8 +189,8 @@ contains #:set name = rname("test_arg_select", 1, arraytype, arraykind, intkind) subroutine ${name}$ - ${inttype}$, parameter :: N = 10, Nreps = 4, Nm = 8 - ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 100_int64) ! < HUGE(N) + ${inttype}$, parameter :: N = 10, Nreps = 2, Nm = 8 + ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 25_int64) ! < HUGE(N) integer, parameter :: ip = ${intkind}$ ${arraytype}$ :: x(N), mat(Nm), len1(1), len2(2), random_vals(Nr), one=1 From 5ac0033b88c6187a60e779899c9ee2d5cd5e0d6b Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 09:37:41 +1000 Subject: [PATCH 11/51] fix example programs --- doc/specs/stdlib_selection.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 3d0717471..bb23e31dd 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -139,23 +139,23 @@ than sorting `a` entirely. real :: kth_smallest integer :: k, left, right - array = 1.0 * [3, 2, 7, 4, 5, 1, 4, -1] + array = [3., 2., 7., 4., 5., 1., 4., -1.] k = 2 call select(array, k, kth_smallest) - print*, kth_smallest ! print 1 + print*, kth_smallest ! print 1.0 k = 7 ! Due to the previous call to select, we know for sure this is in an ! index >= 2 call select(array, k, kth_smallest, left=2) - print*, kth_smallest ! print 5 + print*, kth_smallest ! print 5.0 k = 6 ! Due to the previous two calls to select, we know for sure this is in ! an index >= 2 and <= 7 call select(array, k, kth_smallest, left=2, right=7) - print*, kth_smallest ! print 4 + print*, kth_smallest ! print 4.0 end program demo_select ``` @@ -234,25 +234,25 @@ than sorting `a` entirely. integer :: kth_smallest integer :: k, left, right - array = 1.0 * [3, 2, 7, 4, 5, 1, 4, -1] + array = [3., 2., 7., 4., 5., 1., 4., -1.] indx = (/( k, k = 1, size(array) )/) k = 2 call arg_select(array, indx, k, kth_smallest) - print*, array(kth_smallest) ! print 1 + print*, array(kth_smallest) ! print 1.0 k = 7 ! Due to the previous call to arg_select, we know for sure this is in an ! index >= 2 call arg_select(array, indx, k, kth_smallest, left=2) - print*, array(kth_smallest) ! print 5 + print*, array(kth_smallest) ! print 5.0 k = 6 ! Due to the previous two calls to arg_select, we know for sure this is in ! an index >= 2 and <= 7 call arg_select(array, indx, k, kth_smallest, left=2, right=7) - print*, array(kth_smallest) ! print 4 + print*, array(kth_smallest) ! print 4.0 - end program demo_select + end program demo_arg_select ``` From 55ecacbdeb6bf070fb74706ed13b41e1c58040a9 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 09:42:21 +1000 Subject: [PATCH 12/51] get rid of precision conversion warnings --- src/stdlib_selection.fypp | 46 ++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 9a29bbdd7..0667dd5b5 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -102,39 +102,40 @@ contains ${inttype}$ :: l, r, s, i, j, k_local ${arraytype}$ :: pivot + integer, parameter :: ip = ${intkind}$ - l = 1_${intkind}$ + l = 1_ip if(present(left)) l = left - r = size(a) + r = size(a, kind=ip) if(present(right)) r = right - if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & - r > size(a)) then + if(k < 1_ip .or. k > size(a, kind=ip) .or. l > r .or. l < 1_ip .or. & + r > size(a, kind=ip)) then error stop "select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - k_local = k - l + 1 + k_local = k - l + 1_ip do while(.true.) - s = (l+r)/2_${intkind}$ ! Deliberate integer division + s = (l+r)/2_ip ! Deliberate integer division if(a(s) < a(r)) call swap(a(s), a(r)) if(a(s) < a(l)) call swap(a(s), a(l)) if(a(r) < a(l)) call swap(a(r), a(l)) pivot = a(r) ! Median i = l - do j = l, r-1 + do j = l, r-1_ip if(a(j) <= pivot) then call swap(a(i), a(j)) - i = i+1 + i = i+1_ip end if end do call swap(a(r), a(i)) - s = i-l+1 + s = i-l+1_ip if(k_local < s) then - r = i-1 + r = i-1_ip else if(k_local > s) then - l=i+1; k_local=k_local-s; + l=i+1_ip; k_local=k_local-s; else kth_smallest = a(i) return @@ -191,43 +192,44 @@ contains ${inttype}$ :: l, r, s, i, j, k_local ${arraytype}$ :: pivot + integer, parameter :: ip = ${intkind}$ - l = 1_${intkind}$ + l = 1_ip if(present(left)) l = left - r = size(a) + r = size(a, kind=ip) if(present(right)) r = right if(size(a) /= size(indx)) then error stop "arg_select must have size(a) == size(indx)" end if - if(k < 1 .or. k > size(a) .or. l > r .or. l < 1 .or. & - r > size(a)) then + if(k < 1_ip .or. k > size(a, kind=ip) .or. l > r .or. l < 1_ip .or. & + r > size(a, kind=ip)) then error stop "arg_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - k_local = k - l + 1 + k_local = k - l + 1_ip do while(.true.) - s = (l+r)/2_${intkind}$ ! Deliberate integer division + s = (l+r)/2_ip ! Deliberate integer division if(a(indx(s)) < a(indx(r))) call swap(indx(s), indx(r)) if(a(indx(s)) < a(indx(l))) call swap(indx(s), indx(l)) if(a(indx(r)) < a(indx(l))) call swap(indx(r), indx(l)) pivot = a(indx(r)) ! Median i = l - do j = l, r-1 + do j = l, r-1_ip if(a(indx(j)) <= pivot) then call swap(indx(i), indx(j)) - i = i+1 + i = i+1_ip end if end do call swap(indx(r), indx(i)) - s = i-l+1 + s = i-l+1_ip if(k_local < s) then - r = i-1 + r = i-1_ip else if(k_local > s) then - l=i+1; k_local=k_local-s; + l=i+1_ip; k_local=k_local-s; else kth_smallest = indx(i) return From 112844479b1ab27fe0150fa9777762ad7974a4f9 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 10:20:36 +1000 Subject: [PATCH 13/51] specs --- doc/specs/stdlib_selection.md | 119 ++++++++++++++++++++++++++++++++-- 1 file changed, 112 insertions(+), 7 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index bb23e31dd..ae793e286 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -8,14 +8,16 @@ title: Selection Procedures ## Overview of selection -Suppose you wish to find the value of the kth-smallest entry in an array, or the index -of that value. While it could be done by sorting the whole array using `sort` or `sort_index` -from `stdlib_sorting` and then finding the k-th entry, that would require O(N x LOG(N)) -time. However selection of a single entry can be done in O(N) time, so is much faster for large arrays. -This is useful, for example, to quickly find the median of an array, or some other percentile. +Suppose you wish to find the value of the kth-smallest entry in an array, or +the index of that value. While it could be done by sorting the whole array +using `sort` or `sort_index` from `stdlib_sorting` and then finding the k-th +entry, that would require O(N x LOG(N)) time. However selection of a single +entry can be done in O(N) time, so is much faster for large arrays. This is +useful, for example, to quickly find the median of an array, or some other +percentile. -The Fortran Standard Library therefore provides a module, `stdlib_selection`, which implements -selection algorithms. +The Fortran Standard Library therefore provides a module, `stdlib_selection`, +which implements selection algorithms. ## Overview of the module @@ -256,3 +258,106 @@ than sorting `a` entirely. end program demo_arg_select ``` +## Comparison with using sort + +The following program compares the timings of `select` and `arg_select` for +computing the median of an array, vs using `sort` from stdlib. In theory we +should see a speed improvement with the selection routines which grows like +LOG(size(array)). + +```fortran + program selection_vs_sort + use stdlib_kinds, only: dp, sp, int64 + use stdlib_selection, only: select, arg_select + use stdlib_sorting, only: sort + implicit none + + call compare_select_sort_for_median(1) + call compare_select_sort_for_median(11) + call compare_select_sort_for_median(101) + call compare_select_sort_for_median(1001) + call compare_select_sort_for_median(10001) + call compare_select_sort_for_median(100001) + + + contains + subroutine compare_select_sort_for_median(N) + integer, intent(in) :: N + + integer :: i, k, result_arg_select, indx(N), indx_local(N) + real :: random_vals(N), local_random_vals(N) + integer, parameter :: test_reps = 100 + integer(int64) :: t0, t1 + real :: result_sort, result_select + integer(int64) :: time_sort, time_select, time_arg_select + logical :: select_test_passed, arg_select_test_passed + + ! Ensure N is odd + if(mod(N, 2) /= 1) stop + + time_sort = 0 + time_select = 0 + time_arg_select = 0 + + select_test_passed = .true. + arg_select_test_passed = .true. + + indx = (/( i, i = 1, N) /) + + k = (N+1)/2 ! Deliberate integer division + + do i = 1, test_reps + call random_number(random_vals) + + ! Compute the median with sorting + local_random_vals = random_vals + call system_clock(t0) + call sort(local_random_vals) + result_sort = local_random_vals(k) + call system_clock(t1) + time_sort = time_sort + (t1 - t0) + + ! Compute the median with selection, assuming N is odd + local_random_vals = random_vals + call system_clock(t0) + call select(local_random_vals, k, result_select) + call system_clock(t1) + time_select = time_select + (t1 - t0) + + ! Compute the median with arg_select, assuming N is odd + local_random_vals = random_vals + indx_local = indx + call system_clock(t0) + call arg_select(local_random_vals, indx_local, k, result_arg_select) + call system_clock(t1) + time_arg_select = time_arg_select + (t1 - t0) + + if(result_select /= result_sort) select_test_passed = .FALSE. + if(local_random_vals(result_arg_select) /= result_sort) arg_select_test_passed = .FALSE. + end do + + print*, "select ; N=", N, '; ', merge('PASS', 'FAIL', select_test_passed), & + '; Relative-speedup-vs-sort:', (1.0*time_sort)/(1.0*time_select) + print*, "arg_select; N=", N, '; ', merge('PASS', 'FAIL', arg_select_test_passed), & + '; Relative-speedup-vs-sort:', (1.0*time_sort)/(1.0*time_arg_select) + + end subroutine + + end program +``` + +The results seem consistent with expectations; the program prints: +``` + select ; N= 1 ; PASS; Relative-speedup-vs-sort: 2.11456394 + arg_select; N= 1 ; PASS; Relative-speedup-vs-sort: 3.48637915 + select ; N= 11 ; PASS; Relative-speedup-vs-sort: 1.61203921 + arg_select; N= 11 ; PASS; Relative-speedup-vs-sort: 1.39727581 + select ; N= 101 ; PASS; Relative-speedup-vs-sort: 2.20215392 + arg_select; N= 101 ; PASS; Relative-speedup-vs-sort: 1.92821240 + select ; N= 1001 ; PASS; Relative-speedup-vs-sort: 4.02239370 + arg_select; N= 1001 ; PASS; Relative-speedup-vs-sort: 3.57875609 + select ; N= 10001 ; PASS; Relative-speedup-vs-sort: 4.76596451 + arg_select; N= 10001 ; PASS; Relative-speedup-vs-sort: 4.00693893 + select ; N= 100001 ; PASS; Relative-speedup-vs-sort: 5.76878738 + arg_select; N= 100001 ; PASS; Relative-speedup-vs-sort: 5.04838133 +``` From f029976b3a5ac80fd242da72a0de2253609dc8b2 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 11:00:02 +1000 Subject: [PATCH 14/51] fix doc --- doc/specs/stdlib_selection.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index ae793e286..b4ebf046a 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -204,13 +204,13 @@ and on return it contains the index of the k-th smallest entry of `a(:)`. `left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained in `a(indx(left:size(a)))`. If not present it is 1. This is typically useful if multiple calls -to `select` are made, because the partial sorting of `a` implies constraints on where +to `arg_select` are made, because the partial sorting of `indx` implies constraints on where we need to search. `right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained in `a(indx(1:right))`. If not present it is `size(a)`. This is typically useful if multiple calls -to `select` are made, because the reordering of `indx` implies constraints on +to `arg_select` are made, because the reordering of `indx` implies constraints on where we need to search. ##### Notes From 103b9e998d86bc655b888d76be757153f6f0f016 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 11:00:57 +1000 Subject: [PATCH 15/51] fix doc --- doc/specs/stdlib_selection.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index b4ebf046a..7d41eb305 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -217,7 +217,8 @@ where we need to search. `arg_select` does not modify `a`, unlike `select`. -While it is essential that that `indx` contains the integers 1:size(a), the code does not check for this. +While it is essential that that `indx` contains the integers 1:size(a) (in any +order), the code does not check for this. Selection of a single value should have runtime of O(`size(a)`), so it is asymptotically faster than sorting `a` entirely. From 648a3840bdb972d13bcb10ab98d56d1b2eb858b2 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 11:02:18 +1000 Subject: [PATCH 16/51] fix doc --- doc/specs/stdlib_selection.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 7d41eb305..399468bd2 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -217,7 +217,7 @@ where we need to search. `arg_select` does not modify `a`, unlike `select`. -While it is essential that that `indx` contains the integers 1:size(a) (in any +While it is essential that that `indx` contains the integers `1:size(a)` (in any order), the code does not check for this. Selection of a single value should have runtime of O(`size(a)`), so it is asymptotically faster @@ -347,7 +347,7 @@ LOG(size(array)). end program ``` -The results seem consistent with expectations; the program prints: +The results seem consistent with expectations when the array is large; the program prints: ``` select ; N= 1 ; PASS; Relative-speedup-vs-sort: 2.11456394 arg_select; N= 1 ; PASS; Relative-speedup-vs-sort: 3.48637915 From 0c2140a526972a2df175e7505cbe1b057e56e4e1 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 28 Aug 2021 11:21:30 +1000 Subject: [PATCH 17/51] fix reference to subroutine --- doc/specs/stdlib_selection.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 399468bd2..88eaff634 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -36,7 +36,7 @@ input index array with the same size as `array`, having unique indices from `1:size(array)`, which is modified instead. On return the index array is modified such that `all(array(index(1:k)) <= array(index(k)))` and `all(array(k) <= array(k+1:size(array)))`. The user can optionally specify `left` and `right` indices to constrain the search -for the kth-smallest value. This can be useful if you have previously called `select` +for the kth-smallest value. This can be useful if you have previously called `arg_select` to find a smaller or larger rank (that will have led to partial sorting of `index`, thus implying some constraint on the location). From 797063605cd649abdc40cbd4eee21d2f774f63e4 Mon Sep 17 00:00:00 2001 From: gareth-nx <82561769+gareth-nx@users.noreply.github.com> Date: Sun, 19 Sep 2021 10:01:29 +1000 Subject: [PATCH 18/51] Apply some suggestions from code review by jvdp1 Co-authored-by: Jeremie Vandenplas --- doc/specs/stdlib_selection.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 88eaff634..5ad7b8752 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -10,9 +10,9 @@ title: Selection Procedures Suppose you wish to find the value of the kth-smallest entry in an array, or the index of that value. While it could be done by sorting the whole array -using `sort` or `sort_index` from `stdlib_sorting` and then finding the k-th +using `[[stdlib_sorting(module):sort(interface)]]` or `[[stdlib_sorting(module):sort_index(interface)]]` from `[[stdlib_sorting(module)]]` and then finding the k-th entry, that would require O(N x LOG(N)) time. However selection of a single -entry can be done in O(N) time, so is much faster for large arrays. This is +entry can be done in O(N) time, which is much faster for large arrays. This is useful, for example, to quickly find the median of an array, or some other percentile. @@ -28,7 +28,7 @@ such that `all(array(1:k) <= array(k)))` and `all(array(k) <= array((k+1):size( The user can optionally specify `left` and `right` indices to constrain the search for the kth-smallest value. This can be useful if you have previously called `select` to find a smaller or larger rank (that will have led to partial sorting of -`array`, thus implying some constraint on the location). +`array`, thus implying some constraints on the location). * `arg_select` is used to find the index of the kth-smallest entry of an array. In this case the input array is not modified, but the user must provide an @@ -38,7 +38,7 @@ such that `all(array(index(1:k)) <= array(index(k)))` and `all(array(k) <= array The user can optionally specify `left` and `right` indices to constrain the search for the kth-smallest value. This can be useful if you have previously called `arg_select` to find a smaller or larger rank (that will have led to partial sorting of -`index`, thus implying some constraint on the location). +`index`, thus implying some constraints on the location). #### Licensing @@ -47,7 +47,7 @@ License. However components of the library may be based on code with additional licensing restrictions. In particular `select` and `arg_select` were derived by modifying a matlab implementation of "qselect" by Manolis Lourakis, https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect -Below is the licence of the matlab qselect +Below is the license of the matlab qselect Copyright (c) 2018, Manolis Lourakis All rights reserved. @@ -238,7 +238,7 @@ than sorting `a` entirely. integer :: k, left, right array = [3., 2., 7., 4., 5., 1., 4., -1.] - indx = (/( k, k = 1, size(array) )/) + indx = [( k, k = 1, size(array) )] k = 2 call arg_select(array, indx, k, kth_smallest) @@ -259,7 +259,7 @@ than sorting `a` entirely. end program demo_arg_select ``` -## Comparison with using sort +## Comparison with using `sort` The following program compares the timings of `select` and `arg_select` for computing the median of an array, vs using `sort` from stdlib. In theory we From 487c97390be2884ad3b142fdbb719b3c128bea25 Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 19 Sep 2021 10:12:20 +1000 Subject: [PATCH 19/51] change "a" to "array" in documentation as suggested by jvdp1 --- doc/specs/stdlib_selection.md | 77 ++++++++++++++++++----------------- 1 file changed, 40 insertions(+), 37 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 5ad7b8752..e50dbb037 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -10,11 +10,12 @@ title: Selection Procedures Suppose you wish to find the value of the kth-smallest entry in an array, or the index of that value. While it could be done by sorting the whole array -using `[[stdlib_sorting(module):sort(interface)]]` or `[[stdlib_sorting(module):sort_index(interface)]]` from `[[stdlib_sorting(module)]]` and then finding the k-th -entry, that would require O(N x LOG(N)) time. However selection of a single -entry can be done in O(N) time, which is much faster for large arrays. This is -useful, for example, to quickly find the median of an array, or some other -percentile. +using `[[stdlib_sorting(module):sort(interface)]]` or +`[[stdlib_sorting(module):sort_index(interface)]]` from +`[[stdlib_sorting(module)]]` and then finding the k-th entry, that would +require O(N x LOG(N)) time. However selection of a single entry can be done in +O(N) time, which is much faster for large arrays. This is useful, for example, +to quickly find the median of an array, or some other percentile. The Fortran Standard Library therefore provides a module, `stdlib_selection`, which implements selection algorithms. @@ -86,12 +87,12 @@ Experimental ##### Description -Returns the k-th smallest value of array `a(:)`, and also partially sorts `a(:)` -such that `all(a(1:k) <= a(k))` and `all(a(k) <= a((k+1):size(a)))` +Returns the k-th smallest value of `array(:)`, and also partially sorts `array(:)` +such that `all(array(1:k) <= array(k))` and `all(array(k) <= array((k+1):size(array)))` ##### Syntax -`call [[stdlib_selection(module):select(interface)]]( a, k, kth_smallest [, left, right ] )` +`call [[stdlib_selection(module):select(interface)]]( array, k, kth_smallest [, left, right ] )` ##### Class @@ -99,36 +100,37 @@ Generic subroutine. ##### Arguments -`a` : shall be a rank one array of any of the types: +`array` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, `real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(inout)` argument. On input it is the array in which we search for the kth smallest entry. On return its elements will be partially sorted such that: - `all( a(1:k-1) <= a(k) )` and `all(a(k) <= a(k+1:size(a)))`. + `all( array(1:k-1) <= array(k) )` and `all(array(k) <= array(k+1:size(array)))`. `k`: shall be a scalar with any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It -is an `intent(in)` argument. We search for the `k`-th smallest entry of `a(:)`. +is an `intent(in)` argument. We search for the `k`-th smallest entry of `array(:)`. -`kth_smallest`: shall be a scalar with the same type as `a`. On return it contains -the k-th smallest entry of `a(:)`. +`kth_smallest`: shall be a scalar with the same type as `array`. On return it contains +the k-th smallest entry of `array(:)`. `left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `a(left:size(a))`. If not present it is 1. This is typically useful if multiple calls -to `select` are made, because the partial sorting of `a` implies constraints on where +in `array(left:size(array))`. If not present it is 1. This is typically useful if multiple calls +to `select` are made, because the partial sorting of `array` implies constraints on where we need to search. `right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `a(1:right)`. If not present it is `size(a)`. This is typically useful if multiple calls -to `select` are made, because the partial sorting of `a` implies constraints on where +in `array(1:right)`. If not present it is `size(a)`. This is typically useful if multiple calls +to `select` are made, because the partial sorting of `array` implies constraints on where we need to search. ##### Notes -Selection of a single value should have runtime of O(`size(a)`), so it is asymptotically faster -than sorting `a` entirely. +Selection of a single value should have runtime of O(`size(array)`), so it is +asymptotically faster than sorting `array` entirely. The test program at the the +end of this document shows that is the case. ##### Example @@ -170,13 +172,13 @@ Experimental ##### Description -Returns the index of the k-th smallest value of array `a(:)`, and also partially sorts -the index-array `indx(:)` such that `all(a(indx(1:k)) <= a(indx(k)))` and -`all(a(indx(k)) <= a(indx((k+1):size(a))))` +Returns the index of the k-th smallest value of `array(:)`, and also partially sorts +the index-array `indx(:)` such that `all(array(indx(1:k)) <= array(indx(k)))` and +`all(array(indx(k)) <= array(indx((k+1):size(array))))` ##### Syntax -`call [[stdlib_selection(module):arg_select(interface)]]( a, indx, k, kth_smallest [, left, right ] )` +`call [[stdlib_selection(module):arg_select(interface)]]( array, indx, k, kth_smallest [, left, right ] )` ##### Class @@ -184,44 +186,45 @@ Generic subroutine. ##### Arguments -`a` : shall be a rank one array of any of the types: +`array` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, `real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(in)` argument. On input it is the array in which we search for the kth smallest entry. -`indx`: shall be a rank one array with the same size as `a`, containing integers +`indx`: shall be a rank one array with the same size as `array`, containing all integers from `1:size(a)` in any order. It is of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It is an -`intent(inout)` argument. On return its elements will define a partial sorting of `a(:)` such that: - `all( a(indx(1:k-1)) <= a(indx(k)) )` and `all(a(indx(k)) <= a(indx(k+1:size(a))))`. +`intent(inout)` argument. On return its elements will define a partial sorting of `array(:)` such that: + `all( array(indx(1:k-1)) <= array(indx(k)) )` and `all(array(indx(k)) <= array(indx(k+1:size(array))))`. `k`: shall be a scalar with the same type as `indx`. It is an `intent(in)` -argument. We search for the `k`-th smallest entry of `a(:)`. +argument. We search for the `k`-th smallest entry of `array(:)`. `kth_smallest`: a scalar with the same type as `indx`. It is an `intent(out)` argument, -and on return it contains the index of the k-th smallest entry of `a(:)`. +and on return it contains the index of the k-th smallest entry of `array(:)`. `left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `a(indx(left:size(a)))`. If not present it is 1. This is typically useful if multiple calls +in `array(indx(left:size(array)))`. If not present it is 1. This is typically useful if multiple calls to `arg_select` are made, because the partial sorting of `indx` implies constraints on where we need to search. `right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `a(indx(1:right))`. If not present it is `size(a)`. This is typically useful if multiple calls +in `array(indx(1:right))`. If not present it is `size(array)`. This is typically useful if multiple calls to `arg_select` are made, because the reordering of `indx` implies constraints on where we need to search. ##### Notes -`arg_select` does not modify `a`, unlike `select`. +`arg_select` does not modify `array`, unlike `select`. -While it is essential that that `indx` contains the integers `1:size(a)` (in any +While it is essential that that `indx` contains the integers `1:size(array)` (in any order), the code does not check for this. -Selection of a single value should have runtime of O(`size(a)`), so it is asymptotically faster -than sorting `a` entirely. +Selection of a single value should have runtime of O(`size(array)`), so it is +asymptotically faster than sorting `array` entirely. The test program at the end of +these documents confirms that is the case. ##### Example @@ -264,7 +267,7 @@ than sorting `a` entirely. The following program compares the timings of `select` and `arg_select` for computing the median of an array, vs using `sort` from stdlib. In theory we should see a speed improvement with the selection routines which grows like -LOG(size(array)). +LOG(size(`array`)). ```fortran program selection_vs_sort @@ -347,7 +350,7 @@ LOG(size(array)). end program ``` -The results seem consistent with expectations when the array is large; the program prints: +The results seem consistent with expectations when the `array` is large; the program prints: ``` select ; N= 1 ; PASS; Relative-speedup-vs-sort: 2.11456394 arg_select; N= 1 ; PASS; Relative-speedup-vs-sort: 3.48637915 From 2ffc60b513ceb5cfd74fe18c9bfe3379bebff013 Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 19 Sep 2021 10:20:24 +1000 Subject: [PATCH 20/51] fix some remaining references to "a" by converting to "array" --- doc/specs/stdlib_selection.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index e50dbb037..285a11158 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -8,7 +8,7 @@ title: Selection Procedures ## Overview of selection -Suppose you wish to find the value of the kth-smallest entry in an array, or +Suppose you wish to find the value of the kth-smallest entry in an array of size N, or the index of that value. While it could be done by sorting the whole array using `[[stdlib_sorting(module):sort(interface)]]` or `[[stdlib_sorting(module):sort_index(interface)]]` from @@ -122,7 +122,7 @@ we need to search. `right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `array(1:right)`. If not present it is `size(a)`. This is typically useful if multiple calls +in `array(1:right)`. If not present it is `size(array)`. This is typically useful if multiple calls to `select` are made, because the partial sorting of `array` implies constraints on where we need to search. @@ -192,7 +192,7 @@ Generic subroutine. the array in which we search for the kth smallest entry. `indx`: shall be a rank one array with the same size as `array`, containing all integers -from `1:size(a)` in any order. It is of any of the types: +from `1:size(array)` in any order. It is of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It is an `intent(inout)` argument. On return its elements will define a partial sorting of `array(:)` such that: `all( array(indx(1:k-1)) <= array(indx(k)) )` and `all(array(indx(k)) <= array(indx(k+1:size(array))))`. From 3fca4ee1cc9b27668cafee622ea4dfcc5e62ba69 Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 19 Sep 2021 10:24:04 +1000 Subject: [PATCH 21/51] clearer statement of partial sorting in code doc --- src/stdlib_selection.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 0667dd5b5..9f8b7917b 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -81,7 +81,7 @@ contains ${arraytype}$, intent(inout) :: a(:) !! Array in which we seek the kth-smallest entry. !! On output it will be partially sorted such that - !! `maxval(a(1:(k-1))) <= a(k) <= minval(a((k+1):size(a))).` + !! `all(a(1:(k-1)) <= a(k)) .and. all(a(k) <= a((k+1):size(a)))`. ${inttype}$, intent(in) :: k !! We want the kth smallest entry. E.G. `k=1` leads to !! `kth_smallest=min(a)`, and `k=size(a)` leads to @@ -169,7 +169,7 @@ contains ${inttype}$, intent(inout) :: indx(:) !! Array of indices into `a(:)`. Must contain each integer !! from `1:size(a)` exactly once. On output it will be partially - !! "sorted" such that + !! sorted such that !! `all( a(indx(1:(k-1)))) <= a(indx(k)) ) .AND. !! all( a(indx(k)) <= a(indx( (k+1):size(a) )) )`. ${inttype}$, intent(in) :: k From 3195ae7a804a2f974c5496d69fda05a2190d6e16 Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 3 Oct 2021 09:11:20 +1100 Subject: [PATCH 22/51] move this part of the documentation as suggested by jvdp1 --- doc/specs/stdlib_selection.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 285a11158..5a0e1d408 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -102,10 +102,7 @@ Generic subroutine. `array` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, -`real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(inout)` argument. On input it is -the array in which we search for the kth smallest entry. On return its elements -will be partially sorted such that: - `all( array(1:k-1) <= array(k) )` and `all(array(k) <= array(k+1:size(array)))`. +`real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(inout)` argument. `k`: shall be a scalar with any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It @@ -132,6 +129,9 @@ Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the the end of this document shows that is the case. +On return the elements of `array` will be partially sorted such that: +`all( array(1:k-1) <= array(k) )` and `all(array(k) <= array(k+1:size(array)))`. + ##### Example ```fortran From 0c0e0207652605cc6282bc3b22e31f1b9f19ec0a Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 3 Oct 2021 10:51:47 +1100 Subject: [PATCH 23/51] move to coretran-style implementation to prevent any licence issues --- doc/specs/stdlib_selection.md | 54 ++++--- src/stdlib_selection.fypp | 202 +++++++++++++++--------- src/tests/selection/test_selection.fypp | 8 +- 3 files changed, 154 insertions(+), 110 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 5a0e1d408..554f93234 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -46,29 +46,32 @@ to find a smaller or larger rank (that will have led to partial sorting of The Fortran Standard Library is distributed under the MIT License. However components of the library may be based on code with additional licensing restrictions. In particular `select` and `arg_select` -were derived by modifying a matlab implementation of "qselect" by Manolis -Lourakis, https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect -Below is the license of the matlab qselect +were derived using some code from quickSelect in the Coretran library, by Leon Folks, +https://github.com/leonfoks/coretran. Coretran is released under the following BSD-3 licence. -Copyright (c) 2018, Manolis Lourakis +BSD 3-Clause License + +Copyright (c) 2019, Leon Foks All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution -* Neither the name of Foundation for Research and Technology - Hellas nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER @@ -76,7 +79,6 @@ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ### Specifications of the `stdlib_selection` procedures #### `select` - find the kth smallest value in an input array @@ -352,16 +354,16 @@ LOG(size(`array`)). The results seem consistent with expectations when the `array` is large; the program prints: ``` - select ; N= 1 ; PASS; Relative-speedup-vs-sort: 2.11456394 - arg_select; N= 1 ; PASS; Relative-speedup-vs-sort: 3.48637915 - select ; N= 11 ; PASS; Relative-speedup-vs-sort: 1.61203921 - arg_select; N= 11 ; PASS; Relative-speedup-vs-sort: 1.39727581 - select ; N= 101 ; PASS; Relative-speedup-vs-sort: 2.20215392 - arg_select; N= 101 ; PASS; Relative-speedup-vs-sort: 1.92821240 - select ; N= 1001 ; PASS; Relative-speedup-vs-sort: 4.02239370 - arg_select; N= 1001 ; PASS; Relative-speedup-vs-sort: 3.57875609 - select ; N= 10001 ; PASS; Relative-speedup-vs-sort: 4.76596451 - arg_select; N= 10001 ; PASS; Relative-speedup-vs-sort: 4.00693893 - select ; N= 100001 ; PASS; Relative-speedup-vs-sort: 5.76878738 - arg_select; N= 100001 ; PASS; Relative-speedup-vs-sort: 5.04838133 + select ; N= 1 ; PASS; Relative-speedup-vs-sort: 1.90928173 + arg_select; N= 1 ; PASS; Relative-speedup-vs-sort: 1.76875830 + select ; N= 11 ; PASS; Relative-speedup-vs-sort: 1.14835048 + arg_select; N= 11 ; PASS; Relative-speedup-vs-sort: 1.00794709 + select ; N= 101 ; PASS; Relative-speedup-vs-sort: 2.31012774 + arg_select; N= 101 ; PASS; Relative-speedup-vs-sort: 1.92877376 + select ; N= 1001 ; PASS; Relative-speedup-vs-sort: 4.24190664 + arg_select; N= 1001 ; PASS; Relative-speedup-vs-sort: 3.54580402 + select ; N= 10001 ; PASS; Relative-speedup-vs-sort: 5.61573362 + arg_select; N= 10001 ; PASS; Relative-speedup-vs-sort: 4.79348087 + select ; N= 100001 ; PASS; Relative-speedup-vs-sort: 7.28823519 + arg_select; N= 100001 ; PASS; Relative-speedup-vs-sort: 6.03007460 ``` diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 9f8b7917b..b9c98c034 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -5,36 +5,39 @@ module stdlib_selection ! -! This code was modified from a matlab implementation of "qselect" by Manolis -! Lourakis, https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect -! Below is the licence of qselect +! This code was modified from the "Coretran" implementation "quickSelect" by +! Leon Folks, https://github.com/leonfoks/coretran/tree/master/src/sorting +! Below is the licence of "Coretran" ! -! Copyright (c) 2018, Manolis Lourakis +! BSD 3-Clause License +! +! Copyright (c) 2019, Leon Foks ! All rights reserved. -! +! ! Redistribution and use in source and binary forms, with or without ! modification, are permitted provided that the following conditions are met: -! -! * Redistributions of source code must retain the above copyright notice, this -! list of conditions and the following disclaimer. -! -! * Redistributions in binary form must reproduce the above copyright notice, -! this list of conditions and the following disclaimer in the documentation -! and/or other materials provided with the distribution -! * Neither the name of Foundation for Research and Technology - Hellas nor the -! names of its contributors may be used to endorse or promote products -! derived from this software without specific prior written permission. +! +! 1. Redistributions of source code must retain the above copyright notice, this +! list of conditions and the following disclaimer. +! +! 2. Redistributions in binary form must reproduce the above copyright notice, +! this list of conditions and the following disclaimer in the documentation +! and/or other materials provided with the distribution. +! +! 3. Neither the name of the copyright holder nor the names of its +! contributors may be used to endorse or promote products derived from +! this software without specific prior written permission. +! ! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" ! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE ! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE ! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR ! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER ! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, ! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE ! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -! use stdlib_kinds, only: int8, int16, int32, int64, sp, dp, qp @@ -70,13 +73,8 @@ contains subroutine ${name}$(a, k, kth_smallest, left, right) !! select - select the k-th smallest entry in a(:). !! - !! Implements Hoares Quickselect algorithm - !! (https://en.wikipedia.org/wiki/Quickselect) - !! with the median-of-3 pivot strategy. - !! Operates in-place, avoiding sorting and recursion. - !! - !! Modified from a translation of "qselect" by Manolis Lourakis - !! https://www.mathworks.com/matlabcentral/fileexchange/68947-qselect + !! Partly derived from the "Coretran" implementation of + !! quickSelect by Leon Folks, https://github.com/leonfoks/coretran !! ${arraytype}$, intent(inout) :: a(:) !! Array in which we seek the kth-smallest entry. @@ -100,7 +98,7 @@ contains !! subroutine with different `k` (because of how `a(:)` becomes !! partially sorted, see documentation for `a(:)`). - ${inttype}$ :: l, r, s, i, j, k_local + ${inttype}$ :: l, r, mid, iPivot ${arraytype}$ :: pivot integer, parameter :: ip = ${intkind}$ @@ -114,31 +112,20 @@ contains error stop "select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - k_local = k - l + 1_ip - do while(.true.) - s = (l+r)/2_ip ! Deliberate integer division - if(a(s) < a(r)) call swap(a(s), a(r)) - if(a(s) < a(l)) call swap(a(s), a(l)) - if(a(r) < a(l)) call swap(a(r), a(l)) - pivot = a(r) ! Median - - i = l - do j = l, r-1_ip - if(a(j) <= pivot) then - call swap(a(i), a(j)) - i = i+1_ip - end if - end do - call swap(a(r), a(i)) - s = i-l+1_ip - if(k_local < s) then - r = i-1_ip - else if(k_local > s) then - l=i+1_ip; k_local=k_local-s; - else - kth_smallest = a(i) - return + mid = (l+r)/2_ip ! Deliberate integer division + + call medianOf3(a, l, mid, r) + call swap(a(l), a(mid)) + call partition(a, l, r, iPivot) + + if (iPivot < k) then + l = iPivot + 1 + elseif (iPivot > k) then + r = iPivot - 1 + elseif (iPivot == k) then + kth_smallest = a(k) + return end if end do @@ -148,6 +135,41 @@ contains ${arraytype}$ :: tmp tmp = a; a = b; b = tmp end subroutine + + subroutine medianOf3(a, left, mid, right) + ${arraytype}$, intent(inout) :: a(:) + ${inttype}$, intent(in) :: left, mid, right + if(a(right) < a(left)) call swap(a(right), a(left)) + if(a(mid) < a(left)) call swap(a(mid) , a(left)) + if(a(right) < a(mid) ) call swap(a(mid) , a(right)) + end subroutine + + subroutine partition(array,left,right,iPivot) + ${arraytype}$, intent(inout) :: array(:) + ${inttype}$, intent(in) :: left, right + ${inttype}$, intent(out) :: iPivot + ${inttype}$ :: lo,hi + ${arraytype}$ :: pivot + + pivot = array(left) + lo = left + hi=right + do while (lo <= hi) + do while (array(hi) > pivot) + hi=hi-1 + end do + do while (lo <= hi .and. array(lo) <= pivot) + lo=lo+1 + end do + if (lo <= hi) then + call swap(array(lo),array(hi)) + lo=lo+1 + hi=hi-1 + end if + end do + call swap(array(left),array(hi)) + iPivot=hi + end subroutine end subroutine #:endfor #:endfor @@ -159,10 +181,8 @@ contains subroutine ${name}$(a, indx, k, kth_smallest, left, right) !! arg_select - find the index of the k-th smallest entry in `a(:)` !! - !! Implements Hoares Quickselect algorithm - !! https://en.wikipedia.org/wiki/Quickselect) - !! with the median-of-3 pivot strategy. - !! Operates in-place, avoiding sorting and recursion. + !! Partly derived from the "Coretran" implementation of + !! quickSelect by Leon Folks, https://github.com/leonfoks/coretran !! ${arraytype}$, intent(in) :: a(:) !! Array in which we seek the kth-smallest entry. @@ -190,7 +210,7 @@ contains !! called the subroutine with a different `k` (due to the way that !! `indx(:)` becomes partially sorted, see documentation for `indx(:)`). - ${inttype}$ :: l, r, s, i, j, k_local + ${inttype}$ :: l, r, mid, iPivot ${arraytype}$ :: pivot integer, parameter :: ip = ${intkind}$ @@ -208,31 +228,20 @@ contains error stop "arg_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - k_local = k - l + 1_ip - do while(.true.) - s = (l+r)/2_ip ! Deliberate integer division - if(a(indx(s)) < a(indx(r))) call swap(indx(s), indx(r)) - if(a(indx(s)) < a(indx(l))) call swap(indx(s), indx(l)) - if(a(indx(r)) < a(indx(l))) call swap(indx(r), indx(l)) - pivot = a(indx(r)) ! Median - - i = l - do j = l, r-1_ip - if(a(indx(j)) <= pivot) then - call swap(indx(i), indx(j)) - i = i+1_ip - end if - end do - call swap(indx(r), indx(i)) - s = i-l+1_ip - if(k_local < s) then - r = i-1_ip - else if(k_local > s) then - l=i+1_ip; k_local=k_local-s; - else - kth_smallest = indx(i) - return + mid = (l+r)/2_ip ! Deliberate integer division + + call arg_medianOf3(a, indx, l, mid, r) + call swap(indx(l), indx(mid)) + call arg_partition(a, indx, l, r, iPivot) + + if (iPivot < k) then + l = iPivot + 1 + elseif (iPivot > k) then + r = iPivot - 1 + elseif (iPivot == k) then + kth_smallest = indx(k) + return end if end do @@ -242,6 +251,43 @@ contains ${inttype}$ :: tmp tmp = a; a = b; b = tmp end subroutine + + subroutine arg_medianOf3(a, indx, left, mid, right) + ${arraytype}$, intent(in) :: a(:) + ${inttype}$, intent(inout) :: indx(:) + ${inttype}$, intent(in) :: left, mid, right + if(a(indx(right)) < a(indx(left))) call swap(indx(right), indx(left)) + if(a(indx(mid)) < a(indx(left))) call swap(indx(mid) , indx(left)) + if(a(indx(right)) < a(indx(mid)) ) call swap(indx(mid) , indx(right)) + end subroutine + + subroutine arg_partition(array, indx, left,right,iPivot) + ${arraytype}$, intent(in) :: array(:) + ${inttype}$, intent(inout) :: indx(:) + ${inttype}$, intent(in) :: left, right + ${inttype}$, intent(out) :: iPivot + ${inttype}$ :: lo,hi + ${arraytype}$ :: pivot + + pivot = array(indx(left)) + lo = left + hi = right + do while (lo <= hi) + do while (array(indx(hi)) > pivot) + hi=hi-1 + end do + do while (lo <= hi .and. array(indx(lo)) <= pivot) + lo=lo+1 + end do + if (lo <= hi) then + call swap(indx(lo),indx(hi)) + lo=lo+1 + hi=hi-1 + end if + end do + call swap(indx(left),indx(hi)) + iPivot=hi + end subroutine end subroutine #:endfor #:endfor diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index 4b497c1f1..909ed7901 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -69,9 +69,7 @@ contains call check( (kth_smallest == i**2), " ${name}$: kth smallest entry with right specified") end do - ! - ! Variants of test that came with the matlab documentation - ! + ! Simple tests mat = one * [3, 2, 7, 4, 5, 1, 4, -1] mat_copy = mat call select(mat_copy, 1_ip, kth_smallest) @@ -231,9 +229,7 @@ contains call check( (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with right specified") end do - ! - ! Variants of test that came with the matlab documentation for qselect - ! + ! Simple tests mat = one * [3, 2, 7, 4, 5, 1, 4, -1] indx_mat = (/( i, i = 1, size(mat, kind=ip) )/) From 1b0f5db056cd6d6c81aa68c36fd2e5664c035e45 Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 3 Oct 2021 11:20:29 +1100 Subject: [PATCH 24/51] remove some warnings --- src/stdlib_selection.fypp | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index b9c98c034..4b048f2a6 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -99,7 +99,6 @@ contains !! partially sorted, see documentation for `a(:)`). ${inttype}$ :: l, r, mid, iPivot - ${arraytype}$ :: pivot integer, parameter :: ip = ${intkind}$ l = 1_ip @@ -120,9 +119,9 @@ contains call partition(a, l, r, iPivot) if (iPivot < k) then - l = iPivot + 1 + l = iPivot + 1_ip elseif (iPivot > k) then - r = iPivot - 1 + r = iPivot - 1_ip elseif (iPivot == k) then kth_smallest = a(k) return @@ -156,15 +155,15 @@ contains hi=right do while (lo <= hi) do while (array(hi) > pivot) - hi=hi-1 + hi=hi-1_ip end do do while (lo <= hi .and. array(lo) <= pivot) - lo=lo+1 + lo=lo+1_ip end do if (lo <= hi) then call swap(array(lo),array(hi)) - lo=lo+1 - hi=hi-1 + lo=lo+1_ip + hi=hi-1_ip end if end do call swap(array(left),array(hi)) @@ -236,9 +235,9 @@ contains call arg_partition(a, indx, l, r, iPivot) if (iPivot < k) then - l = iPivot + 1 + l = iPivot + 1_ip elseif (iPivot > k) then - r = iPivot - 1 + r = iPivot - 1_ip elseif (iPivot == k) then kth_smallest = indx(k) return @@ -274,15 +273,15 @@ contains hi = right do while (lo <= hi) do while (array(indx(hi)) > pivot) - hi=hi-1 + hi=hi-1_ip end do do while (lo <= hi .and. array(indx(lo)) <= pivot) - lo=lo+1 + lo=lo+1_ip end do if (lo <= hi) then call swap(indx(lo),indx(hi)) - lo=lo+1 - hi=hi-1 + lo=lo+1_ip + hi=hi-1_ip end if end do call swap(indx(left),indx(hi)) From af025d18c0a296f5d62b4fcc912d60b724e0dd4b Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 3 Oct 2021 12:20:15 +1100 Subject: [PATCH 25/51] fix for out of bounds error --- src/stdlib_selection.fypp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 4b048f2a6..bbb42acea 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -157,7 +157,8 @@ contains do while (array(hi) > pivot) hi=hi-1_ip end do - do while (lo <= hi .and. array(lo) <= pivot) + do while (lo <= hi ) + if(array(lo) > pivot) exit lo=lo+1_ip end do if (lo <= hi) then @@ -275,7 +276,8 @@ contains do while (array(indx(hi)) > pivot) hi=hi-1_ip end do - do while (lo <= hi .and. array(indx(lo)) <= pivot) + do while (lo <= hi ) + if(array(indx(lo)) > pivot) exit lo=lo+1_ip end do if (lo <= hi) then From 943a111140a1e9e867aa3eadbe11b53ec568f3bf Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 3 Oct 2021 14:57:56 +1100 Subject: [PATCH 26/51] Changes suggested by Leon Folks --- doc/specs/stdlib_selection.md | 35 +++---------------------------- src/stdlib_selection.fypp | 39 ++++++----------------------------- 2 files changed, 9 insertions(+), 65 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 554f93234..3a5eae1c7 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -46,38 +46,9 @@ to find a smaller or larger rank (that will have led to partial sorting of The Fortran Standard Library is distributed under the MIT License. However components of the library may be based on code with additional licensing restrictions. In particular `select` and `arg_select` -were derived using some code from quickSelect in the Coretran library, by Leon Folks, -https://github.com/leonfoks/coretran. Coretran is released under the following BSD-3 licence. - -BSD 3-Clause License - -Copyright (c) 2019, Leon Foks -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +were derived using some code from quickSelect in the Coretran library, by Leon Foks, +https://github.com/leonfoks/coretran. Leon has given permission for the code here +to be released under stdlib's MIT licence. ### Specifications of the `stdlib_selection` procedures diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index bbb42acea..c049ba488 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -6,38 +6,11 @@ module stdlib_selection ! ! This code was modified from the "Coretran" implementation "quickSelect" by -! Leon Folks, https://github.com/leonfoks/coretran/tree/master/src/sorting -! Below is the licence of "Coretran" +! Leon Foks, https://github.com/leonfoks/coretran/tree/master/src/sorting +! +! Leon gave permission for this to be released under stdlib's MIT licence. +! (https://github.com/fortran-lang/stdlib/pull/500#commitcomment-57418593) ! -! BSD 3-Clause License -! -! Copyright (c) 2019, Leon Foks -! All rights reserved. -! -! Redistribution and use in source and binary forms, with or without -! modification, are permitted provided that the following conditions are met: -! -! 1. Redistributions of source code must retain the above copyright notice, this -! list of conditions and the following disclaimer. -! -! 2. Redistributions in binary form must reproduce the above copyright notice, -! this list of conditions and the following disclaimer in the documentation -! and/or other materials provided with the distribution. -! -! 3. Neither the name of the copyright holder nor the names of its -! contributors may be used to endorse or promote products derived from -! this software without specific prior written permission. -! -! THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -! AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -! IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -! DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -! FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -! DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -! SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -! CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -! OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -! OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. use stdlib_kinds, only: int8, int16, int32, int64, sp, dp, qp @@ -74,7 +47,7 @@ contains !! select - select the k-th smallest entry in a(:). !! !! Partly derived from the "Coretran" implementation of - !! quickSelect by Leon Folks, https://github.com/leonfoks/coretran + !! quickSelect by Leon Foks, https://github.com/leonfoks/coretran !! ${arraytype}$, intent(inout) :: a(:) !! Array in which we seek the kth-smallest entry. @@ -182,7 +155,7 @@ contains !! arg_select - find the index of the k-th smallest entry in `a(:)` !! !! Partly derived from the "Coretran" implementation of - !! quickSelect by Leon Folks, https://github.com/leonfoks/coretran + !! quickSelect by Leon Foks, https://github.com/leonfoks/coretran !! ${arraytype}$, intent(in) :: a(:) !! Array in which we seek the kth-smallest entry. From 8cc6129bd4fe8d200ba7e402155f7904a3c4ec39 Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 3 Oct 2021 15:02:47 +1100 Subject: [PATCH 27/51] fix unused variable --- src/stdlib_selection.fypp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index c049ba488..a5c4ee2e9 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -184,7 +184,6 @@ contains !! `indx(:)` becomes partially sorted, see documentation for `indx(:)`). ${inttype}$ :: l, r, mid, iPivot - ${arraytype}$ :: pivot integer, parameter :: ip = ${intkind}$ l = 1_ip From 73010e23cd11cce0f74706aa916211fd4fb1bc69 Mon Sep 17 00:00:00 2001 From: gareth-nx <82561769+gareth-nx@users.noreply.github.com> Date: Mon, 4 Oct 2021 09:45:39 +1100 Subject: [PATCH 28/51] Apply suggestions from code review First batch of suggestions from jvdp1 Co-authored-by: Jeremie Vandenplas --- doc/specs/stdlib_selection.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 3a5eae1c7..7e7770050 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -8,7 +8,7 @@ title: Selection Procedures ## Overview of selection -Suppose you wish to find the value of the kth-smallest entry in an array of size N, or +Suppose you wish to find the value of the k-th smallest entry in an array of size N, or the index of that value. While it could be done by sorting the whole array using `[[stdlib_sorting(module):sort(interface)]]` or `[[stdlib_sorting(module):sort_index(interface)]]` from @@ -31,23 +31,22 @@ for the kth-smallest value. This can be useful if you have previously called `se to find a smaller or larger rank (that will have led to partial sorting of `array`, thus implying some constraints on the location). -* `arg_select` is used to find the index of the kth-smallest entry of an array. +* `arg_select` is used to find the index of the k-th smallest entry of an array. In this case the input array is not modified, but the user must provide an input index array with the same size as `array`, having unique indices from `1:size(array)`, which is modified instead. On return the index array is modified such that `all(array(index(1:k)) <= array(index(k)))` and `all(array(k) <= array(k+1:size(array)))`. The user can optionally specify `left` and `right` indices to constrain the search -for the kth-smallest value. This can be useful if you have previously called `arg_select` +for the k-th smallest value. This can be useful if you have previously called `arg_select` to find a smaller or larger rank (that will have led to partial sorting of `index`, thus implying some constraints on the location). #### Licensing The Fortran Standard Library is distributed under the MIT -License. However components of the library may be based on code with -additional licensing restrictions. In particular `select` and `arg_select` +License. It is worth noting that `select` and `arg_select` were derived using some code from quickSelect in the Coretran library, by Leon Foks, -https://github.com/leonfoks/coretran. Leon has given permission for the code here +https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here to be released under stdlib's MIT licence. ### Specifications of the `stdlib_selection` procedures From ddec12c932570a70b815139ef442d601e858a2a7 Mon Sep 17 00:00:00 2001 From: gareth-nx <82561769+gareth-nx@users.noreply.github.com> Date: Mon, 4 Oct 2021 09:57:34 +1100 Subject: [PATCH 29/51] Apply suggestions from code review by jvdp1 Co-authored-by: Jeremie Vandenplas --- doc/specs/stdlib_selection.md | 16 ++++++++-------- src/stdlib_selection.fypp | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 7e7770050..c27e195e6 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -47,11 +47,11 @@ The Fortran Standard Library is distributed under the MIT License. It is worth noting that `select` and `arg_select` were derived using some code from quickSelect in the Coretran library, by Leon Foks, https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here -to be released under stdlib's MIT licence. +to be released under stdlib's MIT license. ### Specifications of the `stdlib_selection` procedures -#### `select` - find the kth smallest value in an input array +#### `select` - find the k-th smallest value in an input array ##### Status @@ -85,20 +85,20 @@ the k-th smallest entry of `array(:)`. `left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `array(left:size(array))`. If not present it is 1. This is typically useful if multiple calls +in `array(left:size(array))`. If `left` is not present, the default is 1. This is typically useful if multiple calls to `select` are made, because the partial sorting of `array` implies constraints on where we need to search. `right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `array(1:right)`. If not present it is `size(array)`. This is typically useful if multiple calls +in `array(1:right)`. If `right` is not present, the default is `size(array)`. This is typically useful if multiple calls to `select` are made, because the partial sorting of `array` implies constraints on where we need to search. ##### Notes Selection of a single value should have runtime of O(`size(array)`), so it is -asymptotically faster than sorting `array` entirely. The test program at the the +asymptotically faster than sorting `array` entirely. The test program at the end of this document shows that is the case. On return the elements of `array` will be partially sorted such that: @@ -177,13 +177,13 @@ and on return it contains the index of the k-th smallest entry of `array(:)`. `left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `array(indx(left:size(array)))`. If not present it is 1. This is typically useful if multiple calls +in `array(indx(left:size(array)))`. If `left` is not present, the default is 1. This is typically useful if multiple calls to `arg_select` are made, because the partial sorting of `indx` implies constraints on where we need to search. `right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `array(indx(1:right))`. If not present it is `size(array)`. This is typically useful if multiple calls +in `array(indx(1:right))`. If `right` is not present, the default is `size(array)`. This is typically useful if multiple calls to `arg_select` are made, because the reordering of `indx` implies constraints on where we need to search. @@ -237,7 +237,7 @@ these documents confirms that is the case. ## Comparison with using `sort` The following program compares the timings of `select` and `arg_select` for -computing the median of an array, vs using `sort` from stdlib. In theory we +computing the median of an array, vs using `sort` from `stdlib`. In theory we should see a speed improvement with the selection routines which grows like LOG(size(`array`)). diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index a5c4ee2e9..a9d505cc4 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -8,7 +8,7 @@ module stdlib_selection ! This code was modified from the "Coretran" implementation "quickSelect" by ! Leon Foks, https://github.com/leonfoks/coretran/tree/master/src/sorting ! -! Leon gave permission for this to be released under stdlib's MIT licence. +! Leon Foks gave permission to be release this code under stdlib's MIT license. ! (https://github.com/fortran-lang/stdlib/pull/500#commitcomment-57418593) ! From 1ad67259da462b43bf91ba901e7847104f5c5f63 Mon Sep 17 00:00:00 2001 From: gareth Date: Mon, 4 Oct 2021 10:40:47 +1100 Subject: [PATCH 30/51] Further changes suggested by jvdp1 --- doc/specs/stdlib_selection.md | 47 ++++++++++++++++------------------- src/stdlib_selection.fypp | 5 ++++ 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index c27e195e6..eab827792 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -33,7 +33,7 @@ to find a smaller or larger rank (that will have led to partial sorting of * `arg_select` is used to find the index of the k-th smallest entry of an array. In this case the input array is not modified, but the user must provide an -input index array with the same size as `array`, having unique indices from +input index array with the same size as `array`, having indices that are a permutation of `1:size(array)`, which is modified instead. On return the index array is modified such that `all(array(index(1:k)) <= array(index(k)))` and `all(array(k) <= array(k+1:size(array)))`. The user can optionally specify `left` and `right` indices to constrain the search @@ -41,7 +41,7 @@ for the k-th smallest value. This can be useful if you have previously called `a to find a smaller or larger rank (that will have led to partial sorting of `index`, thus implying some constraints on the location). -#### Licensing +### Licensing The Fortran Standard Library is distributed under the MIT License. It is worth noting that `select` and `arg_select` @@ -49,28 +49,26 @@ were derived using some code from quickSelect in the Coretran library, by Leon F https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here to be released under stdlib's MIT license. -### Specifications of the `stdlib_selection` procedures +## `select` - find the k-th smallest value in an input array -#### `select` - find the k-th smallest value in an input array - -##### Status +### Status Experimental -##### Description +### Description Returns the k-th smallest value of `array(:)`, and also partially sorts `array(:)` such that `all(array(1:k) <= array(k))` and `all(array(k) <= array((k+1):size(array)))` -##### Syntax +### Syntax `call [[stdlib_selection(module):select(interface)]]( array, k, kth_smallest [, left, right ] )` -##### Class +### Class Generic subroutine. -##### Arguments +### Arguments `array` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, @@ -95,16 +93,13 @@ in `array(1:right)`. If `right` is not present, the default is `size(array)`. Th to `select` are made, because the partial sorting of `array` implies constraints on where we need to search. -##### Notes +### Notes Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the end of this document shows that is the case. -On return the elements of `array` will be partially sorted such that: -`all( array(1:k-1) <= array(k) )` and `all(array(k) <= array(k+1:size(array)))`. - -##### Example +### Example ```fortran program demo_select @@ -136,27 +131,27 @@ On return the elements of `array` will be partially sorted such that: end program demo_select ``` -#### `arg_select` - find the kth smallest value in an input array +## `arg_select` - find the kth smallest value in an input array -##### Status +### Status Experimental -##### Description +### Description Returns the index of the k-th smallest value of `array(:)`, and also partially sorts the index-array `indx(:)` such that `all(array(indx(1:k)) <= array(indx(k)))` and `all(array(indx(k)) <= array(indx((k+1):size(array))))` -##### Syntax +### Syntax `call [[stdlib_selection(module):arg_select(interface)]]( array, indx, k, kth_smallest [, left, right ] )` -##### Class +### Class Generic subroutine. -##### Arguments +### Arguments `array` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, @@ -187,19 +182,21 @@ in `array(indx(1:right))`. If `right` is not present, the default is `size(array to `arg_select` are made, because the reordering of `indx` implies constraints on where we need to search. -##### Notes +### Notes `arg_select` does not modify `array`, unlike `select`. -While it is essential that that `indx` contains the integers `1:size(array)` (in any -order), the code does not check for this. +While it is essential that that `indx` contains a permutation of the integers `1:size(array)`, +the code does not check for this. For example if `size(array) == 4`, then we could have +`indx = [4, 2, 1, 3]` or `indx = [1, 2, 3, 4]`, but not `indx = [2, 1, 2, 4]`. It is the user's +responsibility to avoid such errors. Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the end of these documents confirms that is the case. -##### Example +### Example ```fortran diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index a9d505cc4..45cf28979 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -21,6 +21,9 @@ private public select, arg_select interface select + !! version: experimental + !! ([Specification](..//page/specs/stdlib_selection.html#description)) + #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES #:set name = rname("select", 1, arraytype, arraykind, intkind) @@ -30,6 +33,8 @@ interface select end interface interface arg_select + !! version: experimental + !! ([Specification](..//page/specs/stdlib_selection.html#description_1)) #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES #:set name = rname("arg_select", 1, arraytype, arraykind, intkind) From 1730985782b9b3b79eb68ab96f09b1409872a02a Mon Sep 17 00:00:00 2001 From: gareth Date: Thu, 7 Oct 2021 19:16:05 +1100 Subject: [PATCH 31/51] consistent use of k-th smallest as suggested by jvdp1 --- doc/specs/stdlib_selection.md | 8 ++++---- src/stdlib_selection.fypp | 16 ++++++++-------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index eab827792..d2f9725d9 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -23,11 +23,11 @@ which implements selection algorithms. ## Overview of the module The module `stdlib_selection` defines two generic subroutines: -* `select` is used to find the kth-smallest entry of an array. The input +* `select` is used to find the k-th smallest entry of an array. The input array is also modified in-place, and on return will be partially sorted such that `all(array(1:k) <= array(k)))` and `all(array(k) <= array((k+1):size(array)))`. The user can optionally specify `left` and `right` indices to constrain the search -for the kth-smallest value. This can be useful if you have previously called `select` +for the k-th smallest value. This can be useful if you have previously called `select` to find a smaller or larger rank (that will have led to partial sorting of `array`, thus implying some constraints on the location). @@ -131,7 +131,7 @@ end of this document shows that is the case. end program demo_select ``` -## `arg_select` - find the kth smallest value in an input array +## `arg_select` - find the k-th smallest value in an input array ### Status @@ -156,7 +156,7 @@ Generic subroutine. `array` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, `real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(in)` argument. On input it is -the array in which we search for the kth smallest entry. +the array in which we search for the k-th smallest entry. `indx`: shall be a rank one array with the same size as `array`, containing all integers from `1:size(array)` in any order. It is of any of the types: diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 45cf28979..d9e8bf63a 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -55,18 +55,18 @@ contains !! quickSelect by Leon Foks, https://github.com/leonfoks/coretran !! ${arraytype}$, intent(inout) :: a(:) - !! Array in which we seek the kth-smallest entry. + !! Array in which we seek the k-th smallest entry. !! On output it will be partially sorted such that !! `all(a(1:(k-1)) <= a(k)) .and. all(a(k) <= a((k+1):size(a)))`. ${inttype}$, intent(in) :: k - !! We want the kth smallest entry. E.G. `k=1` leads to + !! We want the k-th smallest entry. E.G. `k=1` leads to !! `kth_smallest=min(a)`, and `k=size(a)` leads to !! `kth_smallest=max(a)` ${arraytype}$, intent(out) :: kth_smallest - !! On output contains the kth-smallest value of `a(:)` + !! On output contains the k-th smallest value of `a(:)` ${inttype}$, intent(in), optional :: left, right !! If we know that: - !! the kth-smallest entry of `a` is in `a(left:right)` + !! the k-th smallest entry of `a` is in `a(left:right)` !! and also that: !! `maxval(a(1:(left-1))) <= minval(a(left:right))` !! and: @@ -163,7 +163,7 @@ contains !! quickSelect by Leon Foks, https://github.com/leonfoks/coretran !! ${arraytype}$, intent(in) :: a(:) - !! Array in which we seek the kth-smallest entry. + !! Array in which we seek the k-th smallest entry. ${inttype}$, intent(inout) :: indx(:) !! Array of indices into `a(:)`. Must contain each integer !! from `1:size(a)` exactly once. On output it will be partially @@ -171,14 +171,14 @@ contains !! `all( a(indx(1:(k-1)))) <= a(indx(k)) ) .AND. !! all( a(indx(k)) <= a(indx( (k+1):size(a) )) )`. ${inttype}$, intent(in) :: k - !! We want index of the kth smallest entry. E.G. `k=1` leads to + !! We want index of the k-th smallest entry. E.G. `k=1` leads to !! `a(kth_smallest) = min(a)`, and `k=size(a)` leads to !! `a(kth_smallest) = max(a)` ${inttype}$, intent(out) :: kth_smallest - !! On output contains the index with the kth-smallest value of `a(:)` + !! On output contains the index with the k-th smallest value of `a(:)` ${inttype}$, intent(in), optional :: left, right !! If we know that: - !! the kth-smallest entry of `a` is in `a(indx(left:right))` + !! the k-th smallest entry of `a` is in `a(indx(left:right))` !! and also that: !! `maxval(a(indx(1:(left-1)))) <= minval(a(indx(left:right)))` !! and: From 0be350503cd03747e9b8c2671a53a8f26aa31d95 Mon Sep 17 00:00:00 2001 From: gareth Date: Thu, 7 Oct 2021 19:23:20 +1100 Subject: [PATCH 32/51] update specs links and title --- doc/specs/stdlib_selection.md | 2 +- src/stdlib_selection.fypp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index d2f9725d9..9f642562b 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -131,7 +131,7 @@ end of this document shows that is the case. end program demo_select ``` -## `arg_select` - find the k-th smallest value in an input array +## `arg_select` - find the index of the k-th smallest value in an input array ### Status diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index d9e8bf63a..104889531 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -22,7 +22,7 @@ public select, arg_select interface select !! version: experimental - !! ([Specification](..//page/specs/stdlib_selection.html#description)) + !! ([Specification](..//page/specs/stdlib_selection.html#select-find-the-k-th-smallest-value-in-an-input-array)) #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES @@ -34,7 +34,7 @@ end interface interface arg_select !! version: experimental - !! ([Specification](..//page/specs/stdlib_selection.html#description_1)) + !! ([Specification](..//page/specs/stdlib_selection.html#arg_select-find-the-index-of-the-k-th-smallest-value-in-an-input-array)) #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES #:set name = rname("arg_select", 1, arraytype, arraykind, intkind) From a6ecffaee35c9e6bbce1d514cd47a5bcb2bda8d8 Mon Sep 17 00:00:00 2001 From: gareth Date: Thu, 7 Oct 2021 19:27:42 +1100 Subject: [PATCH 33/51] change pth-smallest to p-th smallest, as suggested by jvdp1 --- src/tests/selection/test_selection.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index 909ed7901..bbdf240a7 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -114,7 +114,7 @@ contains ! any_failed=.FALSE. - ! Search for the pth-smallest rank, for all these p + ! Search for the p-th smallest rank, for all these p ! (avoid end-points to enable constrained search tests) do p = 3, Nr-2 @@ -290,7 +290,7 @@ contains ! any_failed=.FALSE. - ! Search for the pth-smallest, for all these p (avoid end-points to + ! Search for the p-th smallest, for all these p (avoid end-points to ! enable additional tests using "left", "right" arguments) do p = 3, Nr-2 From 4ab5fcfc99d465f8493a46b98af98b2224a69ae6 Mon Sep 17 00:00:00 2001 From: gareth-nx <82561769+gareth-nx@users.noreply.github.com> Date: Fri, 15 Oct 2021 19:14:26 +1100 Subject: [PATCH 34/51] Apply suggestions from code review by jvdp1 Co-authored-by: Jeremie Vandenplas --- src/stdlib_selection.fypp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 104889531..30bf0975a 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -107,13 +107,13 @@ contains end do contains - subroutine swap(a, b) + pure subroutine swap(a, b) ${arraytype}$, intent(inout) :: a, b ${arraytype}$ :: tmp tmp = a; a = b; b = tmp end subroutine - subroutine medianOf3(a, left, mid, right) + pure subroutine medianOf3(a, left, mid, right) ${arraytype}$, intent(inout) :: a(:) ${inttype}$, intent(in) :: left, mid, right if(a(right) < a(left)) call swap(a(right), a(left)) @@ -121,7 +121,7 @@ contains if(a(right) < a(mid) ) call swap(a(mid) , a(right)) end subroutine - subroutine partition(array,left,right,iPivot) + pure subroutine partition(array,left,right,iPivot) ${arraytype}$, intent(inout) :: array(:) ${inttype}$, intent(in) :: left, right ${inttype}$, intent(out) :: iPivot @@ -223,13 +223,13 @@ contains end do contains - subroutine swap(a, b) + pure subroutine swap(a, b) ${inttype}$, intent(inout) :: a, b ${inttype}$ :: tmp tmp = a; a = b; b = tmp end subroutine - subroutine arg_medianOf3(a, indx, left, mid, right) + pure subroutine arg_medianOf3(a, indx, left, mid, right) ${arraytype}$, intent(in) :: a(:) ${inttype}$, intent(inout) :: indx(:) ${inttype}$, intent(in) :: left, mid, right @@ -238,7 +238,7 @@ contains if(a(indx(right)) < a(indx(mid)) ) call swap(indx(mid) , indx(right)) end subroutine - subroutine arg_partition(array, indx, left,right,iPivot) + pure subroutine arg_partition(array, indx, left,right,iPivot) ${arraytype}$, intent(in) :: array(:) ${inttype}$, intent(inout) :: indx(:) ${inttype}$, intent(in) :: left, right From 73595b336bc7e7c32b6da3b1ee5b7fd1ff959bd4 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 20 Nov 2021 10:29:47 +1100 Subject: [PATCH 35/51] make selection work with up-to-date stdlib --- src/stdlib_selection.fypp | 2 +- src/tests/selection/Makefile.manual | 4 +-- src/tests/selection/common.fypp | 37 ------------------------- src/tests/selection/test_selection.fypp | 2 +- 4 files changed, 4 insertions(+), 41 deletions(-) delete mode 100644 src/tests/selection/common.fypp diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 30bf0975a..046d4d33f 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -12,7 +12,7 @@ module stdlib_selection ! (https://github.com/fortran-lang/stdlib/pull/500#commitcomment-57418593) ! -use stdlib_kinds, only: int8, int16, int32, int64, sp, dp, qp +use stdlib_kinds implicit none diff --git a/src/tests/selection/Makefile.manual b/src/tests/selection/Makefile.manual index 6054a359c..f04ea1c22 100644 --- a/src/tests/selection/Makefile.manual +++ b/src/tests/selection/Makefile.manual @@ -3,8 +3,8 @@ SRCFYPP =\ SRCGEN = $(SRCFYPP:.fypp=.f90) -$(SRCGEN): %.f90: %.fypp common.fypp - fypp $(FYPPFLAGS) $< $@ +$(SRCGEN): %.f90: %.fypp ../../common.fypp + fypp -I../.. $(FYPPFLAGS) $< $@ PROGS_SRC = $(SRCGEN) diff --git a/src/tests/selection/common.fypp b/src/tests/selection/common.fypp deleted file mode 100644 index 7c7c44c68..000000000 --- a/src/tests/selection/common.fypp +++ /dev/null @@ -1,37 +0,0 @@ -#:mute - -#! Real kinds to be considered during templating -#:set REAL_KINDS = ["sp", "dp", "qp"] - -#! Real types to be considered during templating -#:set REAL_TYPES = ["real({})".format(k) for k in REAL_KINDS] - -#! Collected (kind, type) tuples for real types -#:set REAL_KINDS_TYPES = list(zip(REAL_KINDS, REAL_TYPES)) - -#! Integer kinds to be considered during templating -#:set INT_KINDS = ["int8", "int16", "int32", "int64"] - -#! Integer types to be considered during templating -#:set INT_TYPES = ["integer({})".format(k) for k in INT_KINDS] - -#! Collected (kind, type) tuples for integer types -#:set INT_KINDS_TYPES = list(zip(INT_KINDS, INT_TYPES)) - -#! Generates a routine name from a generic name, rank, type and kind -#! -#! Args: -#! gname (str): Generic name -#! rank (integer): Rank if exist -#! type (str): Type of the input -#! kind (str): kind of inputs variable -#! suffix (str): other identifier (could be used for output type/kind) -#! -#! Returns: -#! A string with a new name -#! -#:def rname(gname, rank, type, kind, suffix='') - $:"{0}_{1}_{2}{3}_{2}{3}".format(gname, rank, type[0], kind) if suffix == '' else "{0}_{1}_{2}{3}_{4}".format(gname, rank, type[0], kind, suffix) -#:enddef - -#:endmute diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index bbdf240a7..2a20de79d 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -7,7 +7,7 @@ program test_selection use stdlib_error, only: check - use stdlib_kinds, only: int8, int16, int32, int64, sp, dp, qp + use stdlib_kinds use stdlib_selection, only: select, arg_select implicit none From 5df1b749a2a06999005005f49b267fb661c6f641 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 20 Nov 2021 16:12:52 +1100 Subject: [PATCH 36/51] get the test-suite running with testdrive --- src/tests/selection/Makefile.manual | 3 +- src/tests/selection/test_selection.fypp | 169 +++++++++++++++++------- 2 files changed, 123 insertions(+), 49 deletions(-) diff --git a/src/tests/selection/Makefile.manual b/src/tests/selection/Makefile.manual index f04ea1c22..94a94037c 100644 --- a/src/tests/selection/Makefile.manual +++ b/src/tests/selection/Makefile.manual @@ -1,5 +1,4 @@ -SRCFYPP =\ - test_selection.fypp +SRCFYPP = test_selection.fypp SRCGEN = $(SRCFYPP:.fypp=.f90) diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index 2a20de79d..e36f62da1 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -4,32 +4,48 @@ ! The index arrays are of all INT_KINDS_TYPES -program test_selection +module test_selection - use stdlib_error, only: check use stdlib_kinds use stdlib_selection, only: select, arg_select + use testdrive, only: new_unittest, unittest_type, error_type, check + implicit none - #:for arraykind, arraytype in ARRAY_KINDS_TYPES - #:for intkind, inttype in INT_KINDS_TYPES - #:set name = rname("test_select", 1, arraytype, arraykind, intkind) - call ${name}$() - #:endfor - #:endfor + private + public :: collect_selection - #:for arraykind, arraytype in ARRAY_KINDS_TYPES - #:for intkind, inttype in INT_KINDS_TYPES - #:set name = rname("test_arg_select", 1, arraytype, arraykind, intkind) - call ${name}$() - #:endfor - #:endfor contains + !> Collect all exported unit tests + subroutine collect_selection(testsuite) + !> Collection of tests + type(unittest_type), allocatable, intent(out) :: testsuite(:) + + testsuite = [ & + new_unittest("test_select_1_iint8_int8", test_select_1_iint8_int8) & + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("test_select", 1, arraytype, arraykind, intkind) + , new_unittest("${name}$", ${name}$) & + #:endfor + #:endfor + + #:for arraykind, arraytype in ARRAY_KINDS_TYPES + #:for intkind, inttype in INT_KINDS_TYPES + #:set name = rname("test_arg_select", 1, arraytype, arraykind, intkind) + , new_unittest("${name}$", ${name}$) & + #:endfor + #:endfor + ] + + end subroutine collect_selection + #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES #:set name = rname("test_select", 1, arraytype, arraykind, intkind) - subroutine ${name}$() + subroutine ${name}$(error) + type(error_type), allocatable, intent(out) :: error ${inttype}$, parameter :: N = 10, Nreps = 2, Nm = 8 ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 25_int64) ! < HUGE(N) @@ -50,7 +66,8 @@ contains do i = 1, size(x, kind=ip) x_copy = x call select(x_copy, i, kth_smallest) - call check( (kth_smallest == i**2), " ${name}$: kth smallest entry should be i**2") + call check( error, (kth_smallest == i**2), " ${name}$: kth smallest entry should be i**2") + if(allocated(error)) return end do ! Check that it works when we specify "left" and know that the array @@ -58,7 +75,8 @@ contains x_copy = x do i = 1, size(x, kind=ip), 1 call select(x_copy, i, kth_smallest, left=i) - call check( (kth_smallest == i**2), " ${name}$: kth smallest entry with left specified") + call check( error, (kth_smallest == i**2), " ${name}$: kth smallest entry with left specified") + if(allocated(error)) return end do ! Check that it works when we specify "right" and know that the array @@ -66,48 +84,60 @@ contains x_copy = x do i = size(x, kind=ip), 1, -1 call select(x_copy, i, kth_smallest, right=i) - call check( (kth_smallest == i**2), " ${name}$: kth smallest entry with right specified") + call check( error, (kth_smallest == i**2), " ${name}$: kth smallest entry with right specified") + if(allocated(error)) return end do ! Simple tests mat = one * [3, 2, 7, 4, 5, 1, 4, -1] mat_copy = mat call select(mat_copy, 1_ip, kth_smallest) - call check( kth_smallest == -1, " ${name}$: mat test 1") + call check(error, kth_smallest == -1, " ${name}$: mat test 1") + if(allocated(error)) return mat_copy = mat call select(mat_copy, 2_ip, kth_smallest) - call check( kth_smallest == 1, " ${name}$: mat test 2") + call check(error, kth_smallest == 1, " ${name}$: mat test 2") + if(allocated(error)) return mat_copy = mat call select(mat_copy, size(mat, kind=ip)+1_ip-4_ip, kth_smallest) - call check( kth_smallest == 4, " ${name}$: mat test 3") + call check(error, kth_smallest == 4, " ${name}$: mat test 3") + if(allocated(error)) return mat_copy = mat call select(mat_copy, 5_ip, kth_smallest) - call check( kth_smallest == 4, " ${name}$: mat test 4") + call check(error, kth_smallest == 4, " ${name}$: mat test 4") + if(allocated(error)) return mat_copy = mat call select(mat_copy, 6_ip, kth_smallest) - call check( kth_smallest == 4, " ${name}$: mat test 5") + call check(error, kth_smallest == 4, " ${name}$: mat test 5") + if(allocated(error)) return mat_copy = mat call select(mat_copy, 7_ip, kth_smallest) - call check( kth_smallest == 5, " ${name}$: mat test 6") + call check(error, kth_smallest == 5, " ${name}$: mat test 6") + if(allocated(error)) return ! Check it works for size(a) == 1 len1(1) = -1 * one call select(len1, 1_ip, kth_smallest) - call check(kth_smallest == -1, " ${name}$: array with size 1") + call check(error, kth_smallest == -1, " ${name}$: array with size 1") + if(allocated(error)) return ! Check it works for size(a) == 2 len2 = [-3, -5]*one call select(len2, 2_ip, kth_smallest) - call check(kth_smallest == -3, " ${name}$: array with size 2, test 1") + call check(error, kth_smallest == -3, " ${name}$: array with size 2, test 1") + if(allocated(error)) return len2 = [-3, -5]*one call select(len2, 1_ip, kth_smallest) - call check(kth_smallest == -5, " ${name}$: array with size 2, test 2") + call check(error, kth_smallest == -5, " ${name}$: array with size 2, test 2") + if(allocated(error)) return len2 = [-1, -1]*one call select(len2, 1_ip, kth_smallest) - call check(kth_smallest == -1, " ${name}$: array with size 2, test 3") + call check(error, kth_smallest == -1, " ${name}$: array with size 2, test 3") + if(allocated(error)) return len2 = [-1, -1]*one call select(len2, 2_ip, kth_smallest) - call check(kth_smallest == -1, " ${name}$: array with size 2, test 4") + call check(error, kth_smallest == -1, " ${name}$: array with size 2, test 4") + if(allocated(error)) return ! ! Test using random data @@ -174,7 +204,8 @@ contains end do end do - call check( (.not. any_failed), " ${name}$: random number test failed ") + call check(error, (.not. any_failed), " ${name}$: random number test failed ") + if(allocated(error)) return end subroutine @@ -185,7 +216,8 @@ contains #:for arraykind, arraytype in ARRAY_KINDS_TYPES #:for intkind, inttype in INT_KINDS_TYPES #:set name = rname("test_arg_select", 1, arraytype, arraykind, intkind) - subroutine ${name}$ + subroutine ${name}$(error) + type(error_type), allocatable, intent(out) :: error ${inttype}$, parameter :: N = 10, Nreps = 2, Nm = 8 ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 25_int64) ! < HUGE(N) @@ -210,7 +242,8 @@ contains do i = 1, size(x, kind=ip) indx_copy = indx call arg_select(x, indx, i, kth_smallest) - call check(x(kth_smallest) == i**2, " ${name}$: kth smallest entry should be i**2") + call check(error, x(kth_smallest) == i**2, " ${name}$: kth smallest entry should be i**2") + if(allocated(error)) return end do ! Check that it works when we specify "left" and know that the index @@ -218,7 +251,8 @@ contains indx_copy = indx do i = 1, size(x, kind=ip), 1 call arg_select(x, indx_copy, i, kth_smallest, left=i) - call check( (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with left specified") + call check(error, (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with left specified") + if(allocated(error)) return end do ! Check that it works when we specify "right" and know that the index @@ -226,7 +260,8 @@ contains indx_copy = indx do i = size(x, kind=ip), 1, -1 call arg_select(x, indx_copy, i, kth_smallest, right=i) - call check( (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with right specified") + call check(error, (x(kth_smallest) == i**2), " ${name}$: kth smallest entry with right specified") + if(allocated(error)) return end do ! Simple tests @@ -235,55 +270,66 @@ contains indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 1_ip, kth_smallest) - call check( mat(kth_smallest) == -1, " ${name}$: mat test 1") + call check(error, mat(kth_smallest) == -1, " ${name}$: mat test 1") + if(allocated(error)) return indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 2_ip, kth_smallest) - call check( mat(kth_smallest) == 1, " ${name}$: mat test 2") + call check(error, mat(kth_smallest) == 1, " ${name}$: mat test 2") + if(allocated(error)) return indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, size(mat, kind=ip)+1_ip-4_ip, & kth_smallest) - call check( mat(kth_smallest) == 4, " ${name}$: mat test 3") + call check(error, mat(kth_smallest) == 4, " ${name}$: mat test 3") + if(allocated(error)) return indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 5_ip, kth_smallest) - call check( mat(kth_smallest) == 4, " ${name}$: mat test 4") + call check(error, mat(kth_smallest) == 4, " ${name}$: mat test 4") + if(allocated(error)) return indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 6_ip, kth_smallest) - call check( mat(kth_smallest) == 4, " ${name}$: mat test 5") + call check(error, mat(kth_smallest) == 4, " ${name}$: mat test 5") + if(allocated(error)) return indx_mat_copy = indx_mat call arg_select(mat, indx_mat_copy, 7_ip, kth_smallest) - call check( mat(kth_smallest) == 5, " ${name}$: mat test 6") + call check(error, mat(kth_smallest) == 5, " ${name}$: mat test 6") + if(allocated(error)) return ! Check it works for size(a) == 1 len1(1) = -1 * one indx_len1(1) = 1 call arg_select(len1, indx_len1, 1_ip, kth_smallest) - call check(len1(kth_smallest) == -1, " ${name}$: array with size 1") + call check(error, len1(kth_smallest) == -1, " ${name}$: array with size 1") + if(allocated(error)) return ! Check it works for size(a) == 2 len2 = [-3, -5] * one indx_len2 = [1_ip, 2_ip] call arg_select(len2, indx_len2, 2_ip, kth_smallest) - call check(len2(kth_smallest) == -3, " ${name}$: array with size 2, test 1") + call check(error, len2(kth_smallest) == -3, " ${name}$: array with size 2, test 1") + if(allocated(error)) return len2 = [-3, -5] * one indx_len2 = [1_ip, 2_ip] call arg_select(len2, indx_len2, 1_ip, kth_smallest) - call check(len2(kth_smallest) == -5, " ${name}$: array with size 2, test 2") + call check(error, len2(kth_smallest) == -5, " ${name}$: array with size 2, test 2") + if(allocated(error)) return len2 = [-1, -1] * one indx_len2 = [1_ip, 2_ip] call arg_select(len2, indx_len2, 1_ip, kth_smallest) - call check(len2(kth_smallest) == -1, " ${name}$: array with size 2, test 3") + call check(error, len2(kth_smallest) == -1, " ${name}$: array with size 2, test 3") + if(allocated(error)) return len2 = [-1, -1] * one indx_len2 = [1_ip, 2_ip] call arg_select(len2, indx_len2, 2_ip, kth_smallest) - call check(len2(kth_smallest) == -1, " ${name}$: array with size 2, test 4") + call check(error, len2(kth_smallest) == -1, " ${name}$: array with size 2, test 4") + if(allocated(error)) return ! ! Test using random data @@ -360,10 +406,39 @@ contains end do end do - call check( (.not. any_failed), " ${name}$: random number test failed ") + call check(error, (.not. any_failed), " ${name}$: random number test failed ") + if(allocated(error)) return end subroutine #:endfor #:endfor -end program +end module + +program tester + use, intrinsic :: iso_fortran_env, only: compiler_version, error_unit + use testdrive, only: new_testsuite, run_testsuite, testsuite_type + use test_selection, only: collect_selection + + implicit none + integer :: stat, is + type(testsuite_type), allocatable :: testsuites(:) + character(len=*), parameter :: fmt = '("#", *(1x, a))' + + stat = 0 + + testsuites = [ & + new_testsuite("selection", collect_selection) & + ] + + do is = 1, size(testsuites) + write(error_unit, fmt) "Testing:", testsuites(is)%name + call run_testsuite(testsuites(is)%collect, error_unit, stat) + end do + + if (stat > 0) then + write(error_unit, '(i0, 1x, a)') stat, "test(s) failed!" + error stop + end if + +end program tester From f4a4dacb4ae2d17bd3787c94f59d1b4185ac4f0c Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 20 Nov 2021 16:25:49 +1100 Subject: [PATCH 37/51] remove line causing bug in cmake builds --- src/tests/selection/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/tests/selection/CMakeLists.txt b/src/tests/selection/CMakeLists.txt index 6b56c6d10..da0f56958 100644 --- a/src/tests/selection/CMakeLists.txt +++ b/src/tests/selection/CMakeLists.txt @@ -5,8 +5,6 @@ set(fppFiles test_selection.fypp ) -set(fyppFlags) - fypp_f90("${fyppFlags}" "${fppFiles}" outFiles) ADDTEST(selection) From 854fe4c53cf73aac9805d3b1ac4d021a68b23955 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 20 Nov 2021 16:47:40 +1100 Subject: [PATCH 38/51] make dependency on stdlib_kinds clear --- src/Makefile.manual | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Makefile.manual b/src/Makefile.manual index 9ec4527ae..da8ef6045 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -105,6 +105,8 @@ stdlib_quadrature_trapz.o: \ stdlib_quadrature.o \ stdlib_error.o \ stdlib_kinds.o +stdlib_selection.o: \ + stdlib_kinds.o stdlib_sorting.o: \ stdlib_kinds.o \ stdlib_string_type.o From 8a803e999e08fec8c91d5311abc5795718b5d7bf Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 20 Nov 2021 16:49:11 +1100 Subject: [PATCH 39/51] spaces not tabs --- src/Makefile.manual | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Makefile.manual b/src/Makefile.manual index da8ef6045..d03a9072c 100644 --- a/src/Makefile.manual +++ b/src/Makefile.manual @@ -106,7 +106,7 @@ stdlib_quadrature_trapz.o: \ stdlib_error.o \ stdlib_kinds.o stdlib_selection.o: \ - stdlib_kinds.o + stdlib_kinds.o stdlib_sorting.o: \ stdlib_kinds.o \ stdlib_string_type.o From 573dbfadadf8f7c4ffb2070e8e6579058628ff13 Mon Sep 17 00:00:00 2001 From: gareth Date: Sat, 20 Nov 2021 17:28:08 +1100 Subject: [PATCH 40/51] avoid a single logical treating many tests --- src/tests/selection/test_selection.fypp | 48 ++++++++++--------------- 1 file changed, 19 insertions(+), 29 deletions(-) diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index e36f62da1..5440d8ab0 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -54,7 +54,7 @@ contains kth_smallest, random_vals(Nr), one = 1 ${inttype}$ :: i, p, up_rank, down_rank, mid_rank real(dp) :: random_doubles(Nr) ! Deliberately double precision for all cases - logical :: test1, test2, test3, any_failed + logical :: test1, test2, test3 integer, parameter :: ip = ${intkind}$ ! x contains the numbers 1**2, 2**2, .... 10**2, with mixed-up order @@ -142,8 +142,6 @@ contains ! ! Test using random data ! - any_failed=.FALSE. - ! Search for the p-th smallest rank, for all these p ! (avoid end-points to enable constrained search tests) do p = 3, Nr-2 @@ -161,8 +159,8 @@ contains test2 = all(random_vals(1:(p-1)) <= random_vals(p)) test3 = all(random_vals(p) <= & random_vals((p+1):size(random_vals, kind=ip))) - if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & - any_failed = .TRUE. + call check(error, (test1 .and. test2 .and. test3), "${name}$: random data regular select") + if(allocated(error)) return ! Constrained search above 'p', providing 'left' up_rank = p + (Nr-p)/2_ip ! Deliberate integer division @@ -172,8 +170,8 @@ contains test2 = all(random_vals(1:(up_rank-1)) <= random_vals(up_rank)) test3 = all(random_vals(up_rank) <= & random_vals((up_rank+1):size(random_vals, kind=ip))) - if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & - any_failed = .TRUE. + call check(error, (test1 .and. test2 .and. test3), "${name}$: random data left-constrained select") + if(allocated(error)) return ! Constrained search below p, providing 'right' down_rank = p - (p/2_ip) @@ -184,8 +182,8 @@ contains random_vals(down_rank)) test3 = all(random_vals(down_rank) <= & random_vals((down_rank+1):size(random_vals, kind=ip))) - if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & - any_failed = .TRUE. + call check(error, (test1 .and. test2 .and. test3), "${name}$: random data right-constrained select") + if(allocated(error)) return ! Constrained search between up-ind and down-ind, proving left ! and right. Make 'mid_rank' either above or below p @@ -198,16 +196,12 @@ contains random_vals(mid_rank)) test3 = all(random_vals(mid_rank) <= & random_vals((mid_rank+1):size(random_vals, kind=ip))) - if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & - any_failed = .TRUE. + call check(error, (test1 .and. test2 .and. test3), "${name}$: random data left-right-constrained select") + if(allocated(error)) return end do end do - call check(error, (.not. any_failed), " ${name}$: random number test failed ") - if(allocated(error)) return - - end subroutine #:endfor #:endfor @@ -229,7 +223,7 @@ contains indx_len1(1), indx_len2(2), indx_r(Nr) real(dp) :: random_doubles(Nr) ! Deliberately double precision for all cases integer(ip) :: i, j, p, up_rank, down_rank, mid_rank, kth_smallest - logical :: test1, test2, test3, any_failed + logical :: test1, test2, test3 ! Make x contain 1**2, 2**2, .... 10**2, but mix up the order x = (/( i**2, i=1, size(x, kind=ip) )/) @@ -334,8 +328,6 @@ contains ! ! Test using random data ! - any_failed=.FALSE. - ! Search for the p-th smallest, for all these p (avoid end-points to ! enable additional tests using "left", "right" arguments) do p = 3, Nr-2 @@ -357,8 +349,8 @@ contains random_vals(indx_r(p))) test3 = all(random_vals(indx_r(p)) <= & random_vals(indx_r((p+1):size(random_vals, kind=ip)))) - if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & - any_failed = .TRUE. + call check(error, (test1 .and. test2 .and. test3), "${name}$: random data regular arg_select") + if(allocated(error)) return ! Constrained search for a rank above 'p', providing 'left' up_rank = p + (Nr-p)/2_ip ! Deliberate integer division @@ -371,8 +363,9 @@ contains random_vals(indx_r(up_rank))) test3 = all(random_vals(indx_r(up_rank)) <= & random_vals(indx_r((up_rank+1):size(random_vals, kind=ip)))) - if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & - any_failed = .TRUE. + call check(error, (test1 .and. test2 .and. test3), "${name}$: random data left-constrained arg_select") + if(allocated(error)) return + ! Constrained search for a rank below p, providing 'right' down_rank = p - (p/2_ip) @@ -385,8 +378,8 @@ contains random_vals(indx_r(down_rank))) test3 = all(random_vals(indx_r(down_rank)) <= & random_vals(indx_r((down_rank+1):size(random_vals, kind=ip)))) - if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & - any_failed = .TRUE. + call check(error, (test1 .and. test2 .and. test3), "${name}$: random data right-constrained arg_select") + if(allocated(error)) return ! Constrained search for a rank between up-ind and down-ind, ! proving left and right. 'mid_rank' is either above or below p @@ -400,15 +393,12 @@ contains random_vals(indx_r(mid_rank))) test3 = all(random_vals(indx_r(mid_rank)) <= & random_vals(indx_r((mid_rank+1):size(random_vals, kind=ip)))) - if( (.not. test1) .or. (.not. test2) .or. (.not. test3) ) & - any_failed = .TRUE. + call check(error, (test1 .and. test2 .and. test3), "${name}$: random data left-right-constrained arg_select") + if(allocated(error)) return end do end do - call check(error, (.not. any_failed), " ${name}$: random number test failed ") - if(allocated(error)) return - end subroutine #:endfor #:endfor From c32ce1767a3c54cc42cd6396ccb028c91c4729fd Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 21 Nov 2021 10:01:54 +1100 Subject: [PATCH 41/51] add in the missing intent of argument k in specs. Also add real(xdp) to list of supported kinds --- doc/specs/stdlib_selection.md | 46 +++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 9f642562b..2fd70a9c5 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -72,26 +72,29 @@ Generic subroutine. `array` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, -`real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(inout)` argument. +`real(sp)`, `real(dp)`, `real(xdp)`, `real(qp)`. It is an `intent(inout)` argument. `k`: shall be a scalar with any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`. It is an `intent(in)` argument. We search for the `k`-th smallest entry of `array(:)`. -`kth_smallest`: shall be a scalar with the same type as `array`. On return it contains -the k-th smallest entry of `array(:)`. +`kth_smallest`: shall be a scalar with the same type as `array`. It is an +`intent(out)` argument. On return it contains the k-th smallest entry of +`array(:)`. -`left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` -argument. If specified then we assume the k-th smallest value is definitely contained -in `array(left:size(array))`. If `left` is not present, the default is 1. This is typically useful if multiple calls -to `select` are made, because the partial sorting of `array` implies constraints on where -we need to search. +`left` (optional): shall be a scalar with the same type as `k`. It is an +`intent(in)` argument. If specified then we assume the k-th smallest value is +definitely contained in `array(left:size(array))`. If `left` is not present, +the default is 1. This is typically useful if multiple calls to `select` are +made, because the partial sorting of `array` implies constraints on where we +need to search. -`right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` -argument. If specified then we assume the k-th smallest value is definitely contained -in `array(1:right)`. If `right` is not present, the default is `size(array)`. This is typically useful if multiple calls -to `select` are made, because the partial sorting of `array` implies constraints on where -we need to search. +`right` (optional): shall be a scalar with the same type as `k`. It is an +`intent(in)` argument. If specified then we assume the k-th smallest value is +definitely contained in `array(1:right)`. If `right` is not present, the +default is `size(array)`. This is typically useful if multiple calls to +`select` are made, because the partial sorting of `array` implies constraints +on where we need to search. ### Notes @@ -155,7 +158,7 @@ Generic subroutine. `array` : shall be a rank one array of any of the types: `integer(int8)`, `integer(int16)`, `integer(int32)`, `integer(int64)`, -`real(sp)`, `real(dp)`, `real(qp)`. It is an `intent(in)` argument. On input it is +`real(sp)`, `real(dp)`, `real(xdp), `real(qp)`. It is an `intent(in)` argument. On input it is the array in which we search for the k-th smallest entry. `indx`: shall be a rank one array with the same size as `array`, containing all integers @@ -172,15 +175,16 @@ and on return it contains the index of the k-th smallest entry of `array(:)`. `left` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `array(indx(left:size(array)))`. If `left` is not present, the default is 1. This is typically useful if multiple calls -to `arg_select` are made, because the partial sorting of `indx` implies constraints on where -we need to search. +in `array(indx(left:size(array)))`. If `left` is not present, the default is 1. +This is typically useful if multiple calls to `arg_select` are made, because +the partial sorting of `indx` implies constraints on where we need to search. `right` (optional): shall be a scalar with the same type as `k`. It is an `intent(in)` argument. If specified then we assume the k-th smallest value is definitely contained -in `array(indx(1:right))`. If `right` is not present, the default is `size(array)`. This is typically useful if multiple calls -to `arg_select` are made, because the reordering of `indx` implies constraints on -where we need to search. +in `array(indx(1:right))`. If `right` is not present, the default is +`size(array)`. This is typically useful if multiple calls to `arg_select` are +made, because the reordering of `indx` implies constraints on where we need to +search. ### Notes @@ -189,7 +193,7 @@ where we need to search. While it is essential that that `indx` contains a permutation of the integers `1:size(array)`, the code does not check for this. For example if `size(array) == 4`, then we could have `indx = [4, 2, 1, 3]` or `indx = [1, 2, 3, 4]`, but not `indx = [2, 1, 2, 4]`. It is the user's -responsibility to avoid such errors. +responsibility to avoid such errors. Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the end of From b6a40e8715bc6469e53383979ba509b047eaad47 Mon Sep 17 00:00:00 2001 From: gareth Date: Wed, 24 Nov 2021 20:29:48 +1100 Subject: [PATCH 42/51] remove indent in specs code examples --- doc/specs/stdlib_selection.md | 255 +++++++++++++++++----------------- 1 file changed, 127 insertions(+), 128 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 2fd70a9c5..24dab1f5b 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -105,33 +105,33 @@ end of this document shows that is the case. ### Example ```fortran - program demo_select - use stdlib_selection, only: select - implicit none +program demo_select + use stdlib_selection, only: select + implicit none - real, allocatable :: array(:) - real :: kth_smallest - integer :: k, left, right + real, allocatable :: array(:) + real :: kth_smallest + integer :: k, left, right - array = [3., 2., 7., 4., 5., 1., 4., -1.] + array = [3., 2., 7., 4., 5., 1., 4., -1.] - k = 2 - call select(array, k, kth_smallest) - print*, kth_smallest ! print 1.0 + k = 2 + call select(array, k, kth_smallest) + print*, kth_smallest ! print 1.0 - k = 7 - ! Due to the previous call to select, we know for sure this is in an - ! index >= 2 - call select(array, k, kth_smallest, left=2) - print*, kth_smallest ! print 5.0 + k = 7 + ! Due to the previous call to select, we know for sure this is in an + ! index >= 2 + call select(array, k, kth_smallest, left=2) + print*, kth_smallest ! print 5.0 - k = 6 - ! Due to the previous two calls to select, we know for sure this is in - ! an index >= 2 and <= 7 - call select(array, k, kth_smallest, left=2, right=7) - print*, kth_smallest ! print 4.0 + k = 6 + ! Due to the previous two calls to select, we know for sure this is in + ! an index >= 2 and <= 7 + call select(array, k, kth_smallest, left=2, right=7) + print*, kth_smallest ! print 4.0 - end program demo_select +end program demo_select ``` ## `arg_select` - find the index of the k-th smallest value in an input array @@ -204,35 +204,35 @@ these documents confirms that is the case. ```fortran - program demo_arg_select - use stdlib_selection, only: arg_select - implicit none - - real, allocatable :: array(:) - integer, allocatable :: indx(:) - integer :: kth_smallest - integer :: k, left, right - - array = [3., 2., 7., 4., 5., 1., 4., -1.] - indx = [( k, k = 1, size(array) )] - - k = 2 - call arg_select(array, indx, k, kth_smallest) - print*, array(kth_smallest) ! print 1.0 - - k = 7 - ! Due to the previous call to arg_select, we know for sure this is in an - ! index >= 2 - call arg_select(array, indx, k, kth_smallest, left=2) - print*, array(kth_smallest) ! print 5.0 - - k = 6 - ! Due to the previous two calls to arg_select, we know for sure this is in - ! an index >= 2 and <= 7 - call arg_select(array, indx, k, kth_smallest, left=2, right=7) - print*, array(kth_smallest) ! print 4.0 - - end program demo_arg_select +program demo_arg_select + use stdlib_selection, only: arg_select + implicit none + + real, allocatable :: array(:) + integer, allocatable :: indx(:) + integer :: kth_smallest + integer :: k, left, right + + array = [3., 2., 7., 4., 5., 1., 4., -1.] + indx = [( k, k = 1, size(array) )] + + k = 2 + call arg_select(array, indx, k, kth_smallest) + print*, array(kth_smallest) ! print 1.0 + + k = 7 + ! Due to the previous call to arg_select, we know for sure this is in an + ! index >= 2 + call arg_select(array, indx, k, kth_smallest, left=2) + print*, array(kth_smallest) ! print 5.0 + + k = 6 + ! Due to the previous two calls to arg_select, we know for sure this is in + ! an index >= 2 and <= 7 + call arg_select(array, indx, k, kth_smallest, left=2, right=7) + print*, array(kth_smallest) ! print 4.0 + +end program demo_arg_select ``` ## Comparison with using `sort` @@ -243,84 +243,83 @@ should see a speed improvement with the selection routines which grows like LOG(size(`array`)). ```fortran - program selection_vs_sort - use stdlib_kinds, only: dp, sp, int64 - use stdlib_selection, only: select, arg_select - use stdlib_sorting, only: sort - implicit none - - call compare_select_sort_for_median(1) - call compare_select_sort_for_median(11) - call compare_select_sort_for_median(101) - call compare_select_sort_for_median(1001) - call compare_select_sort_for_median(10001) - call compare_select_sort_for_median(100001) - - - contains - subroutine compare_select_sort_for_median(N) - integer, intent(in) :: N - - integer :: i, k, result_arg_select, indx(N), indx_local(N) - real :: random_vals(N), local_random_vals(N) - integer, parameter :: test_reps = 100 - integer(int64) :: t0, t1 - real :: result_sort, result_select - integer(int64) :: time_sort, time_select, time_arg_select - logical :: select_test_passed, arg_select_test_passed - - ! Ensure N is odd - if(mod(N, 2) /= 1) stop - - time_sort = 0 - time_select = 0 - time_arg_select = 0 - - select_test_passed = .true. - arg_select_test_passed = .true. - - indx = (/( i, i = 1, N) /) - - k = (N+1)/2 ! Deliberate integer division - - do i = 1, test_reps - call random_number(random_vals) - - ! Compute the median with sorting - local_random_vals = random_vals - call system_clock(t0) - call sort(local_random_vals) - result_sort = local_random_vals(k) - call system_clock(t1) - time_sort = time_sort + (t1 - t0) - - ! Compute the median with selection, assuming N is odd - local_random_vals = random_vals - call system_clock(t0) - call select(local_random_vals, k, result_select) - call system_clock(t1) - time_select = time_select + (t1 - t0) - - ! Compute the median with arg_select, assuming N is odd - local_random_vals = random_vals - indx_local = indx - call system_clock(t0) - call arg_select(local_random_vals, indx_local, k, result_arg_select) - call system_clock(t1) - time_arg_select = time_arg_select + (t1 - t0) - - if(result_select /= result_sort) select_test_passed = .FALSE. - if(local_random_vals(result_arg_select) /= result_sort) arg_select_test_passed = .FALSE. - end do - - print*, "select ; N=", N, '; ', merge('PASS', 'FAIL', select_test_passed), & - '; Relative-speedup-vs-sort:', (1.0*time_sort)/(1.0*time_select) - print*, "arg_select; N=", N, '; ', merge('PASS', 'FAIL', arg_select_test_passed), & - '; Relative-speedup-vs-sort:', (1.0*time_sort)/(1.0*time_arg_select) - - end subroutine - - end program +program selection_vs_sort + use stdlib_kinds, only: dp, sp, int64 + use stdlib_selection, only: select, arg_select + use stdlib_sorting, only: sort + implicit none + + call compare_select_sort_for_median(1) + call compare_select_sort_for_median(11) + call compare_select_sort_for_median(101) + call compare_select_sort_for_median(1001) + call compare_select_sort_for_median(10001) + call compare_select_sort_for_median(100001) + + contains + subroutine compare_select_sort_for_median(N) + integer, intent(in) :: N + + integer :: i, k, result_arg_select, indx(N), indx_local(N) + real :: random_vals(N), local_random_vals(N) + integer, parameter :: test_reps = 100 + integer(int64) :: t0, t1 + real :: result_sort, result_select + integer(int64) :: time_sort, time_select, time_arg_select + logical :: select_test_passed, arg_select_test_passed + + ! Ensure N is odd + if(mod(N, 2) /= 1) stop + + time_sort = 0 + time_select = 0 + time_arg_select = 0 + + select_test_passed = .true. + arg_select_test_passed = .true. + + indx = (/( i, i = 1, N) /) + + k = (N+1)/2 ! Deliberate integer division + + do i = 1, test_reps + call random_number(random_vals) + + ! Compute the median with sorting + local_random_vals = random_vals + call system_clock(t0) + call sort(local_random_vals) + result_sort = local_random_vals(k) + call system_clock(t1) + time_sort = time_sort + (t1 - t0) + + ! Compute the median with selection, assuming N is odd + local_random_vals = random_vals + call system_clock(t0) + call select(local_random_vals, k, result_select) + call system_clock(t1) + time_select = time_select + (t1 - t0) + + ! Compute the median with arg_select, assuming N is odd + local_random_vals = random_vals + indx_local = indx + call system_clock(t0) + call arg_select(local_random_vals, indx_local, k, result_arg_select) + call system_clock(t1) + time_arg_select = time_arg_select + (t1 - t0) + + if(result_select /= result_sort) select_test_passed = .FALSE. + if(local_random_vals(result_arg_select) /= result_sort) arg_select_test_passed = .FALSE. + end do + + print*, "select ; N=", N, '; ', merge('PASS', 'FAIL', select_test_passed), & + '; Relative-speedup-vs-sort:', (1.0*time_sort)/(1.0*time_select) + print*, "arg_select; N=", N, '; ', merge('PASS', 'FAIL', arg_select_test_passed), & + '; Relative-speedup-vs-sort:', (1.0*time_sort)/(1.0*time_arg_select) + + end subroutine + +end program ``` The results seem consistent with expectations when the `array` is large; the program prints: From 78fa5a6903ba0b348b791899128d4c755d8fb80d Mon Sep 17 00:00:00 2001 From: gareth Date: Wed, 24 Nov 2021 20:32:21 +1100 Subject: [PATCH 43/51] Remove Licencing section, and put acknowledgement of Leon Foks in the Notes section --- doc/specs/stdlib_selection.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 24dab1f5b..a0a6983f1 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -41,13 +41,6 @@ for the k-th smallest value. This can be useful if you have previously called `a to find a smaller or larger rank (that will have led to partial sorting of `index`, thus implying some constraints on the location). -### Licensing - -The Fortran Standard Library is distributed under the MIT -License. It is worth noting that `select` and `arg_select` -were derived using some code from quickSelect in the Coretran library, by Leon Foks, -https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here -to be released under stdlib's MIT license. ## `select` - find the k-th smallest value in an input array @@ -102,6 +95,10 @@ Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the end of this document shows that is the case. +`select` was derived using some code from quickSelect in the Coretran library, by Leon Foks, +https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here +to be released under stdlib's MIT license. + ### Example ```fortran @@ -199,6 +196,9 @@ Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the end of these documents confirms that is the case. +`arg_select` was derived using some code from quickSelect in the Coretran library, by Leon Foks, +https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here +to be released under stdlib's MIT license. ### Example From 069a1119accb92f941311919778959664bf14ca6 Mon Sep 17 00:00:00 2001 From: gareth Date: Wed, 24 Nov 2021 20:33:42 +1100 Subject: [PATCH 44/51] slight rewording --- doc/specs/stdlib_selection.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index a0a6983f1..e321dac51 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -95,9 +95,9 @@ Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the end of this document shows that is the case. -`select` was derived using some code from quickSelect in the Coretran library, by Leon Foks, -https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here -to be released under stdlib's MIT license. +`select` was derived from code in the Coretran library by Leon Foks, +https://github.com/leonfoks/coretran. Leon Foks has given permission for the +code here to be released under stdlib's MIT license. ### Example @@ -196,9 +196,9 @@ Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the end of these documents confirms that is the case. -`arg_select` was derived using some code from quickSelect in the Coretran library, by Leon Foks, -https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here -to be released under stdlib's MIT license. +`arg_select` was derived using code from the Coretran library by Leon Foks, +https://github.com/leonfoks/coretran. Leon Foks has given permission for the +code here to be released under stdlib's MIT license. ### Example From 57500c122bf35ea3278e975b5dd664e7fde0bd49 Mon Sep 17 00:00:00 2001 From: gareth-nx <82561769+gareth-nx@users.noreply.github.com> Date: Thu, 25 Nov 2021 17:31:04 +1100 Subject: [PATCH 45/51] Apply the more straightforward suggestions from code review Co-authored-by: Ivan Pribec --- doc/specs/stdlib_selection.md | 2 +- src/stdlib_selection.fypp | 7 ++++--- src/tests/selection/CMakeLists.txt | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index e321dac51..15cddd467 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -25,7 +25,7 @@ which implements selection algorithms. The module `stdlib_selection` defines two generic subroutines: * `select` is used to find the k-th smallest entry of an array. The input array is also modified in-place, and on return will be partially sorted -such that `all(array(1:k) <= array(k)))` and `all(array(k) <= array((k+1):size(array)))`. +such that `all(array(1:k) <= array(k)))` and `all(array(k) <= array((k+1):size(array)))` is true. The user can optionally specify `left` and `right` indices to constrain the search for the k-th smallest value. This can be useful if you have previously called `select` to find a smaller or larger rank (that will have led to partial sorting of diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 046d4d33f..1f7bddafe 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -8,7 +8,7 @@ module stdlib_selection ! This code was modified from the "Coretran" implementation "quickSelect" by ! Leon Foks, https://github.com/leonfoks/coretran/tree/master/src/sorting ! -! Leon Foks gave permission to be release this code under stdlib's MIT license. +! Leon Foks gave permission to release this code under stdlib's MIT license. ! (https://github.com/fortran-lang/stdlib/pull/500#commitcomment-57418593) ! @@ -18,7 +18,7 @@ implicit none private -public select, arg_select +public :: select, arg_select interface select !! version: experimental @@ -57,7 +57,8 @@ contains ${arraytype}$, intent(inout) :: a(:) !! Array in which we seek the k-th smallest entry. !! On output it will be partially sorted such that - !! `all(a(1:(k-1)) <= a(k)) .and. all(a(k) <= a((k+1):size(a)))`. + !! `all(a(1:(k-1)) <= a(k)) .and. all(a(k) <= a((k+1):size(a)))` + !! is true. ${inttype}$, intent(in) :: k !! We want the k-th smallest entry. E.G. `k=1` leads to !! `kth_smallest=min(a)`, and `k=size(a)` leads to diff --git a/src/tests/selection/CMakeLists.txt b/src/tests/selection/CMakeLists.txt index da0f56958..f3bb19811 100644 --- a/src/tests/selection/CMakeLists.txt +++ b/src/tests/selection/CMakeLists.txt @@ -1,7 +1,8 @@ ### Pre-process: .fpp -> .f90 via Fypp # Create a list of the files to be preprocessed -set(fppFiles +set( + fppFiles test_selection.fypp ) From 44d98043be9c50143956fcdce071303fe6ee059d Mon Sep 17 00:00:00 2001 From: gareth Date: Thu, 25 Nov 2021 17:54:36 +1100 Subject: [PATCH 46/51] avoid overflow in computing mid index --- src/stdlib_selection.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 1f7bddafe..542685cf1 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -91,7 +91,7 @@ contains end if do while(.true.) - mid = (l+r)/2_ip ! Deliberate integer division + mid = l + (r-l)/2_ip ! Deliberate integer division call medianOf3(a, l, mid, r) call swap(a(l), a(mid)) @@ -207,7 +207,7 @@ contains end if do while(.true.) - mid = (l+r)/2_ip ! Deliberate integer division + mid = l + (r-l)/2_ip ! Deliberate integer division call arg_medianOf3(a, indx, l, mid, r) call swap(indx(l), indx(mid)) From 52f869441402ea9d5a3f8a3ac6934a85715fca2b Mon Sep 17 00:00:00 2001 From: gareth Date: Thu, 25 Nov 2021 20:36:13 +1100 Subject: [PATCH 47/51] add test that can catch overflow with naive mid index calculation --- src/stdlib_selection.fypp | 4 +-- src/tests/selection/test_selection.fypp | 45 ++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 7 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 542685cf1..f94f2cf1d 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -91,7 +91,7 @@ contains end if do while(.true.) - mid = l + (r-l)/2_ip ! Deliberate integer division + mid = l + ((r-l)/2_ip) ! Avoid (l+r)/2 which can cause overflow call medianOf3(a, l, mid, r) call swap(a(l), a(mid)) @@ -207,7 +207,7 @@ contains end if do while(.true.) - mid = l + (r-l)/2_ip ! Deliberate integer division + mid = l + ((r-l)/2_ip) ! Avoid (l+r)/2 which can cause overflow call arg_medianOf3(a, indx, l, mid, r) call swap(indx(l), indx(mid)) diff --git a/src/tests/selection/test_selection.fypp b/src/tests/selection/test_selection.fypp index 5440d8ab0..5c3932386 100644 --- a/src/tests/selection/test_selection.fypp +++ b/src/tests/selection/test_selection.fypp @@ -47,15 +47,18 @@ contains subroutine ${name}$(error) type(error_type), allocatable, intent(out) :: error - ${inttype}$, parameter :: N = 10, Nreps = 2, Nm = 8 - ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 25_int64) ! < HUGE(N) + integer, parameter :: ip = ${intkind}$ + ${inttype}$, parameter :: N = 10, Nm = 8 + ${inttype}$, parameter :: near_huge = HUGE(N) - 1_ip ! Segfaults without the -1_ip + ${inttype}$, parameter :: Nreps = 2 ! Number of repetitions of random sampling + ${inttype}$, parameter :: Nr = 25_ip ! Size of random array, must be < HUGE(N) ${arraytype}$ :: x(N), x_copy(N), mat(Nm), mat_copy(Nm), len1(1), len2(2), & kth_smallest, random_vals(Nr), one = 1 ${inttype}$ :: i, p, up_rank, down_rank, mid_rank real(dp) :: random_doubles(Nr) ! Deliberately double precision for all cases logical :: test1, test2, test3 - integer, parameter :: ip = ${intkind}$ + ${arraytype}$, allocatable :: long_array(:) ! x contains the numbers 1**2, 2**2, .... 10**2, with mixed-up order x = (/( i**2, i=1, size(x, kind=ip) )/) @@ -88,6 +91,19 @@ contains if(allocated(error)) return end do + ! The test below can catch overflow in naive calculation of the middle index, like discussed here: + ! https://ai.googleblog.com/2006/06/extra-extra-read-all-about-it-nearly.html + ! But don't do it if near_huge is large, to avoid allocating a big array and slowing the tests + if(near_huge < 200) then + allocate(long_array(near_huge)) + long_array = 0 * one + long_array(1:3) = one + call select(long_array, near_huge - 2_ip, kth_smallest) + call check( error, (kth_smallest == one), " ${name}$: designed to catch overflow in middle index") + if(allocated(error)) return + deallocate(long_array) + end if + ! Simple tests mat = one * [3, 2, 7, 4, 5, 1, 4, -1] mat_copy = mat @@ -213,9 +229,11 @@ contains subroutine ${name}$(error) type(error_type), allocatable, intent(out) :: error - ${inttype}$, parameter :: N = 10, Nreps = 2, Nm = 8 - ${inttype}$, parameter :: Nr = min(HUGE(N)/2_int64, 25_int64) ! < HUGE(N) integer, parameter :: ip = ${intkind}$ + ${inttype}$, parameter :: N = 10, Nm = 8 + ${inttype}$, parameter :: near_huge = HUGE(N) - 1_ip ! Segfaults without the -1_ip + ${inttype}$, parameter :: Nreps = 2 ! Number of repetitions of random sampling + ${inttype}$, parameter :: Nr = 25_ip ! Size of random array, must be < HUGE(N) ${arraytype}$ :: x(N), mat(Nm), len1(1), len2(2), random_vals(Nr), one=1 @@ -224,6 +242,8 @@ contains real(dp) :: random_doubles(Nr) ! Deliberately double precision for all cases integer(ip) :: i, j, p, up_rank, down_rank, mid_rank, kth_smallest logical :: test1, test2, test3 + ${arraytype}$, allocatable :: long_array(:) + ${inttype}$, allocatable :: long_array_index(:) ! Make x contain 1**2, 2**2, .... 10**2, but mix up the order x = (/( i**2, i=1, size(x, kind=ip) )/) @@ -258,6 +278,21 @@ contains if(allocated(error)) return end do + ! The test below would catch overflow in naive calculation of the middle index, like discussed here: + ! https://ai.googleblog.com/2006/06/extra-extra-read-all-about-it-nearly.html + ! But don't do it if near_huge is large, to avoid allocating a big array and slowing the tests + if(near_huge < 200) then + allocate(long_array(near_huge)) + allocate(long_array_index(near_huge)) + long_array = 0 * one + long_array(1:3) = one + long_array_index = (/( i, i = 1_ip, size(long_array, kind=ip) )/) + call arg_select(long_array, long_array_index, near_huge - 2_ip, kth_smallest) + call check( error, (kth_smallest < 4), " ${name}$: designed to catch overflow in middle index") + if(allocated(error)) return + deallocate(long_array, long_array_index) + end if + ! Simple tests mat = one * [3, 2, 7, 4, 5, 1, 4, -1] indx_mat = (/( i, i = 1, size(mat, kind=ip) )/) From 4b0d880e8874040762e23f5de356a0572ddd37dc Mon Sep 17 00:00:00 2001 From: gareth Date: Thu, 25 Nov 2021 20:49:24 +1100 Subject: [PATCH 48/51] remove redundant while(.true.) --- src/stdlib_selection.fypp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index f94f2cf1d..084a77750 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -90,7 +90,7 @@ contains error stop "select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - do while(.true.) + do mid = l + ((r-l)/2_ip) ! Avoid (l+r)/2 which can cause overflow call medianOf3(a, l, mid, r) @@ -206,7 +206,7 @@ contains error stop "arg_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - do while(.true.) + do mid = l + ((r-l)/2_ip) ! Avoid (l+r)/2 which can cause overflow call arg_medianOf3(a, indx, l, mid, r) From 817ee309a35f6ff97c2ca9ebc7ab49dfcff0d8df Mon Sep 17 00:00:00 2001 From: gareth Date: Thu, 25 Nov 2021 21:08:28 +1100 Subject: [PATCH 49/51] add comment that arg_sort is not a stable sort --- doc/specs/stdlib_selection.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index 15cddd467..acee13816 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -187,6 +187,9 @@ search. `arg_select` does not modify `array`, unlike `select`. +The partial sorting of `indx` is not stable, i.e., indices that map to equal +values of array may be reordered. + While it is essential that that `indx` contains a permutation of the integers `1:size(array)`, the code does not check for this. For example if `size(array) == 4`, then we could have `indx = [4, 2, 1, 3]` or `indx = [1, 2, 3, 4]`, but not `indx = [2, 1, 2, 4]`. It is the user's From f48614893ea56ac99c646d1cea188e8538dda430 Mon Sep 17 00:00:00 2001 From: gareth Date: Thu, 25 Nov 2021 21:17:06 +1100 Subject: [PATCH 50/51] add statement that NaN entries in array are not supported --- doc/specs/stdlib_selection.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/specs/stdlib_selection.md b/doc/specs/stdlib_selection.md index acee13816..b6694047c 100644 --- a/doc/specs/stdlib_selection.md +++ b/doc/specs/stdlib_selection.md @@ -95,6 +95,10 @@ Selection of a single value should have runtime of O(`size(array)`), so it is asymptotically faster than sorting `array` entirely. The test program at the end of this document shows that is the case. +The code does not support `NaN` elements in `array`; it will run, but there is +no consistent interpretation given to the order of `NaN` entries of `array` +compared to other entries. + `select` was derived from code in the Coretran library by Leon Foks, https://github.com/leonfoks/coretran. Leon Foks has given permission for the code here to be released under stdlib's MIT license. @@ -190,6 +194,10 @@ search. The partial sorting of `indx` is not stable, i.e., indices that map to equal values of array may be reordered. +The code does not support `NaN` elements in `array`; it will run, but there is +no consistent interpretation given to the order of `NaN` entries of `array` +compared to other entries. + While it is essential that that `indx` contains a permutation of the integers `1:size(array)`, the code does not check for this. For example if `size(array) == 4`, then we could have `indx = [4, 2, 1, 3]` or `indx = [1, 2, 3, 4]`, but not `indx = [2, 1, 2, 4]`. It is the user's From 0f4e635433fb9e53c36642bf07329763883444eb Mon Sep 17 00:00:00 2001 From: gareth Date: Sun, 28 Nov 2021 12:06:05 +1100 Subject: [PATCH 51/51] add loop labels --- src/stdlib_selection.fypp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/stdlib_selection.fypp b/src/stdlib_selection.fypp index 084a77750..4a963c5b8 100644 --- a/src/stdlib_selection.fypp +++ b/src/stdlib_selection.fypp @@ -90,7 +90,7 @@ contains error stop "select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - do + searchk: do mid = l + ((r-l)/2_ip) ! Avoid (l+r)/2 which can cause overflow call medianOf3(a, l, mid, r) @@ -105,7 +105,7 @@ contains kth_smallest = a(k) return end if - end do + end do searchk contains pure subroutine swap(a, b) @@ -136,10 +136,10 @@ contains do while (array(hi) > pivot) hi=hi-1_ip end do - do while (lo <= hi ) - if(array(lo) > pivot) exit + inner_lohi: do while (lo <= hi ) + if(array(lo) > pivot) exit inner_lohi lo=lo+1_ip - end do + end do inner_lohi if (lo <= hi) then call swap(array(lo),array(hi)) lo=lo+1_ip @@ -206,7 +206,7 @@ contains error stop "arg_select must have 1 <= k <= size(a), and 1 <= left <= right <= size(a)"; end if - do + searchk: do mid = l + ((r-l)/2_ip) ! Avoid (l+r)/2 which can cause overflow call arg_medianOf3(a, indx, l, mid, r) @@ -221,7 +221,7 @@ contains kth_smallest = indx(k) return end if - end do + end do searchk contains pure subroutine swap(a, b) @@ -254,10 +254,10 @@ contains do while (array(indx(hi)) > pivot) hi=hi-1_ip end do - do while (lo <= hi ) - if(array(indx(lo)) > pivot) exit + inner_lohi: do while (lo <= hi ) + if(array(indx(lo)) > pivot) exit inner_lohi lo=lo+1_ip - end do + end do inner_lohi if (lo <= hi) then call swap(indx(lo),indx(hi)) lo=lo+1_ip