Add Heaps algorithm which crawls over all permutations #14

kalmarek · 2018-06-26T06:14:27Z

The Heap's algorithm which crawls over all permutations seem a good testcase, cause You can not be "smart" and use language specific optimisations, it's just pure operations on integer arrays (indexing, changing content of);

function allperms(N)
    elts = collect(1:N)
    c = ones(Int, N)
    countfirst = 0
    n = 1
    k = 0
#    doit(elts)
    countfirst += elts[1]
    @inbounds while n <= N
      if c[n] < n
         k = ifelse(isodd(n), 1, c[n])
         elts[k], elts[n] = elts[n], elts[k]
         c[n] += 1
         n = 1
         countfirst += elts[1]
#         doit(elts)
      else
        c[n] = 1
        n += 1
      end
   end
   return countfirst
end

a similar implementation in c:

void swap(int *p, int a, int b){
   int tmp;
   tmp = p[a];
   p[a] = p[b];
   p[b] = tmp;
}

long heaps(int N){
   long count = 0;
   int n;

   int *p = malloc(sizeof(int)*N);
   int *c = calloc(N, sizeof(int));
   for (n=0; n < N; n++){
      p[n] = n+1;
      // c[n] = 0;
   }

   count += p[0];

   for (n=0; n < N;){
      if (c[n] < n){
         swap(p, (n%2 ? c[n] : 0), n);

         count += p[0];

         c[n]++;
         n = 0;
      } else {
         c[n] = 0;
         n++;
      }
   }
   free(p);
   free(c);
   return count;
}

EDIT: @inbounds macro makes a fairer comparison to C (and speeds julias version!)

The text was updated successfully, but these errors were encountered:

StefanKarpinski · 2018-06-26T12:54:48Z

That does seem like a good one!

Enet4 · 2018-06-26T15:47:26Z

Here's my Rust attempt. This sure feels hard to optimize.

Stargateur · 2018-06-27T01:08:57Z

#include <stdlib.h>
#include <stdio.h>

void swap(size_t *p, size_t a, size_t b) {
  size_t tmp = p[a];
  p[a] = p[b];
  p[b] = tmp;
}

size_t heaps(size_t n) {
  size_t *a = malloc(n * sizeof *a);
  if (!a) {
    exit(EXIT_FAILURE);
  }
  for (size_t i = 0; i < n; i++) {
    a[i] = i; // i + 1 was strange
  }
    
  size_t *c = calloc(n, sizeof *c);
  if (!c) {
    exit(EXIT_FAILURE);
  }

  size_t count = a[0];

  size_t i = 0;
  while (i < n) {
    if (c[i] < i) {
      swap(a, i % 2 ? c[i] : 0, i);

      count += a[0];

      c[i]++;
      i = 0;
    } else {
      c[i] = 0;
      i++;
    }
  }

  free(a);
  free(c);

  return count;
}

johnfgibson · 2018-07-03T16:24:24Z

I think there are some issues with these codes

heaps(n), n=1:4	code
1, 3, 12, 60	kalmarek Julia
0, 3, 12, 60	karmarek C
0, 1, 6, 36	Stargateur C
1, 2, 6, 24	n!

Stargateur · 2018-07-03T16:28:09Z

@johnfgibson Sorry, I should make it more clear that I change the initialisation in favor of the rust:

p[n] = n+1; // n + 1 could overflow n is as good as n + 1 and can't overflow
let mut perm: Vec<_> = (0..n).collect(); // rust use n
a[i] = i; // so I just used n too

kalmarek · 2018-07-03T16:58:42Z

@johnfgibson, @Stargateur: for comparison with n-factorial: we don't simply count the permutations, but sum the first entry of every permutation (otherwise future compilers may skip the swap part entirely);
p[n] = n+1 was because permutations are usually counted from one, not zero. This does matter for the returned value.

Could You also recheck n=1? my C version returns squarely 1 for N=1 ;-)

johnfgibson · 2018-07-03T17:10:12Z

@kalmarek aha! thank you. (regarding returning the sum of the first entry).

I ran what's currently posted above and got heaps(1:5) == 1, 3, 12, 60, 360. There were some differences from an earlier version (count += p[0] versus count += p[1]) that probably account for the difference.

@Stargateur I don't follow from your comment what changes should be made.

I can add working codes to the microbenchmark suite but I don't think I can debug and fix codes with errors.

Stargateur · 2018-07-03T17:23:59Z

@kalmarek @johnfgibson I'm not a mathematician, if you conclude that function should start init with 1, I'm ok with it. But I didn't think that the result was so important, just as you said it's for avoid compiler to optimize it. I'm just a C dev who read this thread.

This is better ?

#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>

static void swap(uintmax_t *p, size_t a, size_t b) {
  uintmax_t tmp = p[a];
  p[a] = p[b];
  p[b] = tmp;
}

// UB for n == 0
static uintmax_t heaps(size_t n) {
  uintmax_t *a = malloc(n * sizeof *a);
  if (!a) {
    exit(EXIT_FAILURE);
  }
  uintmax_t acu = 1;
  for (size_t i = 0; i < n; i++) {
    a[i] = acu++;
  }

  size_t *c = calloc(n, sizeof *c);
  if (!c) {
    exit(EXIT_FAILURE);
  }

  assert(n != 0); // remove when NDEBUG is defined
  uintmax_t count = a[0];

  size_t i = 0;
  while (i < n) {
    if (c[i] < i) {
      swap(a, i % 2 ? c[i] : 0, i);

      count += a[0];

      c[i]++;
      i = 0;
    } else {
      c[i] = 0;
      i++;
    }
  }

  free(a);
  free(c);

  return count;
}

int main(void) {
  for (size_t i = 1; i < 5; i++) {
    printf("heaps(%zu) = %" PRIuMAX "\n", i, heaps(i));
  }
}

kalmarek · 2018-07-04T08:54:15Z

@Stargateur, sure the exact value does not matter, it was just to explain the differences in the results. If I am not mistaken, You could start from a[0] = 0, but then You need to sum a[1]. Anyway, the important thing is that we swap during each iteration.

BTW How do I call Your code from julia? if I use:

C_code = """
    [Your C code here]
"""
const Clib = tempname()

open(`gcc -fPIC -O3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
    print(f, C_code) 
end

heaps_c(n::Int) = ccall(("heaps", Clib), Int, (Int,), n)

I get ccall: could not find function heaps in library /tmp/juliaACJ6nY, but this works for my, primitive ;-) C code

Stargateur · 2018-07-04T09:45:26Z

Remove static from static uintmax_t heaps(size_t n), it's make the symbol not global.

C_code = """
#include <stdlib.h>
#include <stdio.h>
#include <inttypes.h>
#include <assert.h>

static void swap(uintmax_t *p, size_t a, size_t b) {
  uintmax_t tmp = p[a];
  p[a] = p[b];
  p[b] = tmp;
}

// UB for n == 0
uintmax_t heaps(size_t n) {
  uintmax_t *a = malloc(n * sizeof *a);
  if (!a) {
    exit(EXIT_FAILURE);
  }
  uintmax_t acu = 1;
  for (size_t i = 0; i < n; i++) {
    a[i] = acu++;
  }

  size_t *c = calloc(n, sizeof *c);
  if (!c) {
    exit(EXIT_FAILURE);
  }

  assert(n != 0); // remove when NDEBUG is defined
  uintmax_t count = a[0];

  size_t i = 0;
  while (i < n) {
    if (c[i] < i) {
      swap(a, i % 2 ? c[i] : 0, i);

      count += a[0];

      c[i]++;
      i = 0;
    } else {
      c[i] = 0;
      i++;
    }
  }

  free(a);
  free(c);

  return count;
}
"""
const Clib = tempname()

open(`gcc -fPIC -O3 -xc -shared -o $(Clib * "." * Libdl.dlext) -`, "w") do f
    print(f, C_code) 
end

ccall(("heaps", Clib), Cuintmax_t , (Csize_t,), n)

Enet4 · 2018-07-05T17:14:36Z

I updated the Rust code above as well, thanks to your feedback. It was initializing p from 0 rather than 1.

kalmarek · 2018-07-15T15:25:54Z

a naive python version

def heaps(n):
    elts = [i+1 for i in range(n)]
    c = [0 for i in range(n)]
    n = 0
    countfirst = elts[0]
    while n < len(elts):
        if c[n] < n:
            if n % 2:
                k = c[n]
            else:
                k = 0
            elts[k], elts[n] = elts[n], elts[k]
            
#             print(elts)
            countfirst += elts[0]
            
            c[n] += 1
            n = 0
        else:
            c[n] = 0
            n += 1
    return countfirst

To be honest, I don't know any other language microbenchmark compares julia against, so someone else has to come-up with the code.

kalmarek · 2018-08-08T15:37:18Z

defining heaps_c(n) = Int(ccall(("heaps", Clib), Cuintmax_t , (Csize_t,), n)) I get:

On julia-0.6.4:

julia> @btime allperms(12)
  2.862 s (3 allocations: 384 bytes)
3113510400

julia> @btime heaps_c(12)
  1.470 s (0 allocations: 0 bytes)
3113510400

On julia-0.7.0:

julia> @btime allperms(12)
  1.397 s (2 allocations: 352 bytes)
3113510400

julia> @btime heaps_c(12)
  1.466 s (0 allocations: 0 bytes)
3113510400

I'm impressed!

kalmarek mentioned this issue May 14, 2020

Slow permutations JuliaMath/Combinatorics.jl#95

Closed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add Heaps algorithm which crawls over all permutations #14

Add Heaps algorithm which crawls over all permutations #14

kalmarek commented Jun 26, 2018 •

edited

Loading

StefanKarpinski commented Jun 26, 2018

Enet4 commented Jun 26, 2018 •

edited

Loading

Stargateur commented Jun 27, 2018 •

edited

Loading

johnfgibson commented Jul 3, 2018

Stargateur commented Jul 3, 2018

kalmarek commented Jul 3, 2018

johnfgibson commented Jul 3, 2018

Stargateur commented Jul 3, 2018 •

edited

Loading

kalmarek commented Jul 4, 2018

Stargateur commented Jul 4, 2018

Enet4 commented Jul 5, 2018

kalmarek commented Jul 15, 2018

kalmarek commented Aug 8, 2018

Add Heaps algorithm which crawls over all permutations #14

Add Heaps algorithm which crawls over all permutations #14

Comments

kalmarek commented Jun 26, 2018 • edited Loading

StefanKarpinski commented Jun 26, 2018

Enet4 commented Jun 26, 2018 • edited Loading

Stargateur commented Jun 27, 2018 • edited Loading

johnfgibson commented Jul 3, 2018

Stargateur commented Jul 3, 2018

kalmarek commented Jul 3, 2018

johnfgibson commented Jul 3, 2018

Stargateur commented Jul 3, 2018 • edited Loading

kalmarek commented Jul 4, 2018

Stargateur commented Jul 4, 2018

Enet4 commented Jul 5, 2018

kalmarek commented Jul 15, 2018

kalmarek commented Aug 8, 2018

kalmarek commented Jun 26, 2018 •

edited

Loading

Enet4 commented Jun 26, 2018 •

edited

Loading

Stargateur commented Jun 27, 2018 •

edited

Loading

Stargateur commented Jul 3, 2018 •

edited

Loading