diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..7591e5a --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,46 @@ +# Contributor Covenant Code of Conduct + +## Our Pledge + +In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. + +## Our Standards + +Examples of behavior that contributes to creating a positive environment include: + +* Using welcoming and inclusive language +* Being respectful of differing viewpoints and experiences +* Gracefully accepting constructive criticism +* Focusing on what is best for the community +* Showing empathy towards other community members + +Examples of unacceptable behavior by participants include: + +* The use of sexualized language or imagery and unwelcome sexual attention or advances +* Trolling, insulting/derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or electronic address, without explicit permission +* Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Responsibilities + +Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. + +Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. + +## Scope + +This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at maxhalford25@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. + +Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] + +[homepage]: http://contributor-covenant.org +[version]: http://contributor-covenant.org/version/1/4/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..fd28eb4 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,41 @@ +## Ideas + +- Improve tournament selection testing +- Refactor models testings +- Add more context to errors (at least the method/struct name) (https://dave.cheney.net/2016/04/27/dont-just-check-errors-handle-them-gracefully) +- Implement operators described in http://www.ppgia.pucpr.br/~alceu/mestrado/aula3/IJBB-41.pdf +- Implement Particle Swarm Optimization +- http://deap.readthedocs.io/en/master/ +- http://pyevolve.sourceforge.net/intro.html#ga-features +- http://www.dmi.unict.it/mpavone/nc-cs/materiale/moscato89.pdf + +## Code style + +### Guidelines + +- Keep names short +- Aim for 100 characters per line + +### Variable declaration + +```go +// Bad +x := 42 + +// Good +var x = 42 +``` + +## Naming convention + +Please inspire yourself from the existing algorithms before implementing, the naming conventions are easy to grasp. + + +## Parallelism and random number generation caveat + +Genetic algorithms are notorious for being [embarrassingly parallel](http://www.wikiwand.com/en/Embarrassingly_parallel). Indeed, most calculations can be run in parallel because they only affect part of the GA. Luckily Go provides good support for parallelism. As some gophers may have encountered, the `math/rand` module can be problematic because there is a global lock attached to the random number generator. The problem is described in this [StackOverflow post](http://stackoverflow.com/questions/14298523/why-does-adding-concurrency-slow-down-this-golang-code). This can be circumvented by providing each population with it's own random number generator. + +## Performance + +1. `go test -bench . -cpuprofile=cpu.prof` +2. `go tool pprof -pdf gago.test cpu.prof > profile.pdf` diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0ad811f --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Max Halford + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 08c07fd..879e372 100644 --- a/README.md +++ b/README.md @@ -1,51 +1,4 @@ -
- - logo -
- -
- - - awesome_go - - - - awesome_ml - -
- -
- -
- - - godoc - - - - build_status - - - - test_coverage - - - - go_report_card - - - - Code Climate - - - - license - -
- -
- -
An extensible toolkit for conceiving and running genetic algorithms
+:warning: This is gago, the old version of [eaopt](https://github.com/MaxHalford/eaopt). The repository only exists so that existing projects importing gago won't break. Please use [eaopt](https://github.com/MaxHalford/eaopt) if you're starting a new project or switch to it if your project is using it.
@@ -75,8 +28,8 @@ The following example attempts to minimize the [Drop-Wave function](https://www.sfu.ca/~ssurjano/drop.html) which is known to have a minimum value of -1.
- drop_wave_chart - drop_wave_function + drop_wave_chart + drop_wave_function
```go @@ -155,14 +108,14 @@ func main() { **More examples** -All the examples can be found [here](https://github.com/MaxHalford/gago-examples). +All the examples can be found [here](https://github.com/MaxHalford/eaopt-examples). -- [Cross-in-Tray (speciation)](https://github.com/MaxHalford/gago-examples/tree/master/cross_in_tray) -- [Grid TSP](https://github.com/MaxHalford/gago-examples/tree/master/tsp_grid) -- [Including a constraint](https://github.com/MaxHalford/gago-examples/tree/master/constraint) -- [One Max problem](https://github.com/MaxHalford/gago-examples/tree/master/one_max) -- [N-queens problem](https://github.com/MaxHalford/gago-examples/tree/master/n_queens) -- [String matching](https://github.com/MaxHalford/gago-examples/tree/master/string_matching) +- [Cross-in-Tray (speciation)](https://github.com/MaxHalford/eaopt-examples/tree/master/cross_in_tray) +- [Grid TSP](https://github.com/MaxHalford/eaopt-examples/tree/master/tsp_grid) +- [Including a constraint](https://github.com/MaxHalford/eaopt-examples/tree/master/constraint) +- [One Max problem](https://github.com/MaxHalford/eaopt-examples/tree/master/one_max) +- [N-queens problem](https://github.com/MaxHalford/eaopt-examples/tree/master/n_queens) +- [String matching](https://github.com/MaxHalford/eaopt-examples/tree/master/string_matching) ## Background @@ -256,7 +209,7 @@ type Slice interface { } ``` -Internally `IntSlice`, `Float64Slice` and `StringSlice` implement this interface so that you can use the available operators for most use cases. If however you wish to use the operators which slices of a different type you will have to implement the `Slice` interface. Although there are many methods to implement, they are all trivial (have a look at [`slice.go`](slice.go) and the [TSP example](https://github.com/MaxHalford/gago-examples/tree/master/tsp_grid). +Internally `IntSlice`, `Float64Slice` and `StringSlice` implement this interface so that you can use the available operators for most use cases. If however you wish to use the operators which slices of a different type you will have to implement the `Slice` interface. Although there are many methods to implement, they are all trivial (have a look at [`slice.go`](slice.go) and the [TSP example](https://github.com/MaxHalford/eaopt-examples/tree/master/tsp_grid). ### Instantiating a GA struct diff --git a/benchmark_test.go b/benchmark_test.go new file mode 100644 index 0000000..e8e4a2b --- /dev/null +++ b/benchmark_test.go @@ -0,0 +1,78 @@ +package gago + +import ( + "runtime" + "testing" +) + +func BenchmarkEvolve1Pop(b *testing.B) { + ga = GA{ + NewGenome: NewVector, + NPops: 1, + PopSize: 50, + Model: ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: 0.5, + }, + } + ga.Initialize() + for i := 0; i < b.N; i++ { + ga.Evolve() + } +} + +func BenchmarkEvolve2Pops(b *testing.B) { + runtime.GOMAXPROCS(runtime.NumCPU()) + ga = GA{ + NewGenome: NewVector, + NPops: 2, + PopSize: 50, + Model: ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: 0.5, + }, + } + ga.Initialize() + for i := 0; i < b.N; i++ { + ga.Evolve() + } +} + +func BenchmarkEvolve3Pops(b *testing.B) { + ga = GA{ + NewGenome: NewVector, + NPops: 3, + PopSize: 50, + Model: ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: 0.5, + }, + } + ga.Initialize() + for i := 0; i < b.N; i++ { + ga.Evolve() + } +} +func BenchmarkEvolve4Pops(b *testing.B) { + ga = GA{ + NewGenome: NewVector, + NPops: 4, + PopSize: 50, + Model: ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: 0.5, + }, + } + ga.Initialize() + for i := 0; i < b.N; i++ { + ga.Evolve() + } +} diff --git a/crossover.go b/crossover.go new file mode 100644 index 0000000..7571209 --- /dev/null +++ b/crossover.go @@ -0,0 +1,325 @@ +package gago + +import ( + "math/rand" + "sort" +) + +// Type specific mutations for slices + +// CrossUniformFloat64 crossover combines two individuals (the parents) into one +// (the offspring). Each parent's contribution to the Genome is determined by +// the value of a probability p. Each offspring receives a proportion of both of +// it's parents genomes. The new values are located in the hyper-rectangle +// defined between both parent's position in Cartesian space. +func CrossUniformFloat64(p1 []float64, p2 []float64, rng *rand.Rand) { + for i := range p1 { + var p = rng.Float64() + p1[i] = p*p1[i] + (1-p)*p2[i] + p2[i] = (1-p)*p1[i] + p*p2[i] + } +} + +// Generic mutations for slices + +// Contains the deterministic part of the GNX method for testing purposes. +func gnx(p1, p2 Slice, indexes []int) { + var ( + n = p1.Len() + o1 = p1.Copy() + o2 = p2.Copy() + toggle = true + ) + // Add the first and last indexes + indexes = append([]int{0}, indexes...) + indexes = append(indexes, n) + for i := 0; i < len(indexes)-1; i++ { + if toggle { + o1.Slice(indexes[i], indexes[i+1]).Replace(p1.Slice(indexes[i], indexes[i+1])) + o2.Slice(indexes[i], indexes[i+1]).Replace(p2.Slice(indexes[i], indexes[i+1])) + } else { + o1.Slice(indexes[i], indexes[i+1]).Replace(p2.Slice(indexes[i], indexes[i+1])) + o2.Slice(indexes[i], indexes[i+1]).Replace(p1.Slice(indexes[i], indexes[i+1])) + } + toggle = !toggle // Alternate for the new copying + } + p1.Replace(o1) + p2.Replace(o2) +} + +// CrossGNX (Generalized N-point Crossover). An identical point is chosen on +// each parent's genome and the mirroring segments are switched. n determines +// the number of crossovers (aka mirroring segments) to perform. n has to be +// equal or lower than the number of genes in each parent. +func CrossGNX(p1 Slice, p2 Slice, n int, rng *rand.Rand) { + var indexes = randomInts(n, 1, p1.Len(), rng) + sort.Ints(indexes) + gnx(p1, p2, indexes) +} + +// CrossGNXInt calls CrossGNX on two int slices. +func CrossGNXInt(s1 []int, s2 []int, n int, rng *rand.Rand) { + CrossGNX(IntSlice(s1), IntSlice(s2), n, rng) +} + +// CrossGNXFloat64 calls CrossGNX on two float64 slices. +func CrossGNXFloat64(s1 []float64, s2 []float64, n int, rng *rand.Rand) { + CrossGNX(Float64Slice(s1), Float64Slice(s2), n, rng) +} + +// CrossGNXString calls CrossGNX on two string slices. +func CrossGNXString(s1 []string, s2 []string, n int, rng *rand.Rand) { + CrossGNX(StringSlice(s1), StringSlice(s2), n, rng) +} + +// Contains the deterministic part of the PMX method for testing purposes. +func pmx(p1, p2 Slice, a, b int) { + var ( + n = p1.Len() + o1 = p1.Copy() + o2 = p2.Copy() + ) + // Create lookup maps to quickly see if a gene has been visited + var ( + p1Visited, p2Visited = make(set), make(set) + o1Visited, o2Visited = make(set), make(set) + ) + for i := a; i < b; i++ { + p1Visited[p1.At(i)] = true + p2Visited[p2.At(i)] = true + o1Visited[i] = true + o2Visited[i] = true + } + for i := a; i < b; i++ { + // Find the element in the second parent that has not been copied in the first offspring + if !p1Visited[p2.At(i)] { + var j = i + for o1Visited[j] { + j, _ = search(o1.At(j), p2) + } + o1.Set(j, p2.At(i)) + o1Visited[j] = true + } + // Find the element in the first parent that has not been copied in the second offspring + if !p2Visited[p1.At(i)] { + var j = i + for o2Visited[j] { + j, _ = search(o2.At(j), p1) + } + o2.Set(j, p1.At(i)) + o2Visited[j] = true + } + } + // Fill in the offspring's missing values with the opposite parent's values + for i := 0; i < n; i++ { + if !o1Visited[i] { + o1.Set(i, p2.At(i)) + } + if !o2Visited[i] { + o2.Set(i, p1.At(i)) + } + } + p1.Replace(o1) + p2.Replace(o2) +} + +// CrossPMX (Partially Mapped Crossover). The offsprings are generated by +// copying one of the parents and then copying the other parent's values up to a +// randomly chosen crossover point. Each gene that is replaced is permuted with +// the gene that is copied in the first parent's genome. Two offsprings are +// generated in such a way (because there are two parents). The PMX method +// preserves gene uniqueness. +func CrossPMX(p1 Slice, p2 Slice, rng *rand.Rand) { + var indexes = randomInts(2, 1, p1.Len(), rng) + sort.Ints(indexes) + pmx(p1, p2, indexes[0], indexes[1]) +} + +// CrossPMXInt calls CrossPMX on an int slice. +func CrossPMXInt(s1 []int, s2 []int, rng *rand.Rand) { + CrossPMX(IntSlice(s1), IntSlice(s2), rng) +} + +// CrossPMXFloat64 calls CrossPMX on a float64 slice. +func CrossPMXFloat64(s1 []float64, s2 []float64, rng *rand.Rand) { + CrossPMX(Float64Slice(s1), Float64Slice(s2), rng) +} + +// CrossPMXString calls CrossPMX on a string slice. +func CrossPMXString(s1 []string, s2 []string, rng *rand.Rand) { + CrossPMX(StringSlice(s1), StringSlice(s2), rng) +} + +// Contains the deterministic part of the OX method for testing purposes. +func ox(p1, p2 Slice, a, b int) { + var ( + n = p1.Len() + o1 = p1.Copy() + o2 = p2.Copy() + ) + // Create lookup maps to quickly see if a gene has been copied from a parent or not + var p1Visited, p2Visited = make(set), make(set) + for i := a; i < b; i++ { + p1Visited[p1.At(i)] = true + p2Visited[p2.At(i)] = true + } + // Keep two indicators to know where to fill the offsprings + var j1, j2 = b, b + for i := b; i < b+n; i++ { + var k = i % n + if !p1Visited[p2.At(k)] { + o1.Set(j1%n, p2.At(k)) + j1++ + } + if !p2Visited[p1.At(k)] { + o2.Set(j2%n, p1.At(k)) + j2++ + } + } + p1.Replace(o1) + p2.Replace(o2) +} + +// CrossOX (Ordered Crossover). Part of the first parent's genome is copied onto +// the first offspring's genome. Then the second parent's genome is iterated +// over, starting on the right of the part that was copied. Each gene of the +// second parent's genome is copied onto the next blank gene of the first +// offspring's genome if it wasn't already copied from the first parent. The OX +// method preserves gene uniqueness. +func CrossOX(p1 Slice, p2 Slice, rng *rand.Rand) { + var indexes = randomInts(2, 1, p1.Len(), rng) + sort.Ints(indexes) + ox(p1, p2, indexes[0], indexes[1]) +} + +// CrossOXInt calls CrossOX on a int slice. +func CrossOXInt(s1 []int, s2 []int, rng *rand.Rand) { + CrossOX(IntSlice(s1), IntSlice(s2), rng) +} + +// CrossOXFloat64 calls CrossOX on a float64 slice. +func CrossOXFloat64(s1 []float64, s2 []float64, rng *rand.Rand) { + CrossOX(Float64Slice(s1), Float64Slice(s2), rng) +} + +// CrossOXString calls CrossOX on a string slice. +func CrossOXString(s1 []string, s2 []string, rng *rand.Rand) { + CrossOX(StringSlice(s1), StringSlice(s2), rng) +} + +// CrossCX (Cycle Crossover). Cycles between the parents are indentified, they +// are then copied alternatively onto the offsprings. The CX method is +// deterministic and preserves gene uniqueness. +func CrossCX(p1, p2 Slice) { + var ( + o1 = p1.Copy() + o2 = p2.Copy() + cycles = getCycles(p1, p2) + toggle = true + ) + for i := 0; i < len(cycles); i++ { + for _, j := range cycles[i] { + if toggle { + o1.Set(j, p1.At(j)) + o2.Set(j, p2.At(j)) + } else { + o2.Set(j, p1.At(j)) + o1.Set(j, p2.At(j)) + } + } + toggle = !toggle + } + p1.Replace(o1) + p2.Replace(o2) +} + +// CrossCXInt calls CrossCX on an int slice. +func CrossCXInt(s1 []int, s2 []int) { + CrossCX(IntSlice(s1), IntSlice(s2)) +} + +// CrossCXFloat64 calls CrossCX on a float64 slice. +func CrossCXFloat64(s1 []float64, s2 []float64) { + CrossCX(Float64Slice(s1), Float64Slice(s2)) +} + +// CrossCXString calls CrossCX on a string slice. +func CrossCXString(s1 []string, s2 []string) { + CrossCX(StringSlice(s1), StringSlice(s2)) +} + +// CrossERX (Edge Recombination Crossover). +func CrossERX(p1, p2 Slice) { + var ( + n = p1.Len() + o1 = p1.Copy() + o2 = p2.Copy() + parents = []Slice{p1, p2} + offsprings = []Slice{o1, o2} + p1Neighbours = getNeighbours(p1) + p2Neighbours = getNeighbours(p2) + pNeighbours = make(map[interface{}]set) + ) + // Merge the neighbours of each parent whilst ignoring duplicates + for i := range p1Neighbours { + pNeighbours[i] = union(p1Neighbours[i], p2Neighbours[i]) + } + // Hold two copies of the parent neighbours (one for each offspring) + var neighbours = []map[interface{}]set{pNeighbours, nil} + neighbours[1] = make(map[interface{}]set) + for k, v := range pNeighbours { + neighbours[1][k] = v + } + // Set the first element of each offspring to be the one of the + // corresponding parent + o1.Set(0, p1.At(0)) + o2.Set(0, p2.At(0)) + // Delete the neighbour from the adjacency set + for i := range neighbours { + delete(neighbours[i], parents[i].At(0)) + for j := range neighbours[i] { + if neighbours[i][j][parents[i].At(0)] { + delete(neighbours[i][j], parents[i].At(0)) + } + } + } + for o := range offsprings { + for i := 1; i < n; i++ { + // Find the gene with the least neighbours + var ( + j interface{} + min = 5 // There can't be more than 5 neighbours between 2 parents + ) + for k, v := range neighbours[o] { + if len(v) < min { + j = k + min = len(v) + } + } + offsprings[o].Set(i, j) + delete(neighbours[o], j) + for k := range neighbours[o] { + if neighbours[o][k][j] { + delete(neighbours[o][k], j) + } + } + } + } + p1.Replace(o1) + p2.Replace(o2) +} + +// CrossERXInt calls CrossERX on an int slice. +func CrossERXInt(s1 []int, s2 []int) { + CrossERX(IntSlice(s1), IntSlice(s2)) +} + +// CrossERXFloat64 callsCrossERX on a float64 slice. +func CrossERXFloat64(s1 []float64, s2 []float64) { + CrossERX(Float64Slice(s1), Float64Slice(s2)) +} + +// CrossERXString calls CrossERX on a string slice. +func CrossERXString(s1 []string, s2 []string) { + CrossERX(StringSlice(s1), StringSlice(s2)) +} diff --git a/crossover_test.go b/crossover_test.go new file mode 100644 index 0000000..b160587 --- /dev/null +++ b/crossover_test.go @@ -0,0 +1,513 @@ +package gago + +import ( + "fmt" + "math" + "reflect" + "testing" +) + +func TestCrossUniformFloat64(t *testing.T) { + var ( + rng = newRand() + p1 = NewVector(rng).(Vector) + p2 = NewVector(rng).(Vector) + o1 = p1.Clone().(Vector) + o2 = p2.Clone().(Vector) + ) + CrossUniformFloat64(o1, o2, rng) + // Check lengths + if len(o1) != len(p1) || len(o2) != len(p1) { + t.Error("CrossUniform should not produce offsprings with different sizes") + } + // Check values are different + if reflect.DeepEqual(o1, p1) || reflect.DeepEqual(o2, p2) { + t.Error("Offsprings and parents are not different") + } + // Check new values are contained in hyper-rectangle defined by parents + var ( + bounded = func(x, lower, upper float64) bool { return x > lower && x < upper } + lower float64 + upper float64 + ) + for i := 0; i < len(p1); i++ { + lower = math.Min(p1[i], p2[i]) + upper = math.Max(p1[i], p2[i]) + if !bounded(o1[i], lower, upper) || !bounded(o2[i], lower, upper) { + t.Error("New values are not contained in hyper-rectangle") + } + } +} + +func TestGNX(t *testing.T) { + var testCases = []struct { + p1 []int + p2 []int + indexes []int + o1 []int + o2 []int + }{ + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + indexes: []int{3, 7}, + o1: []int{1, 2, 3, 8, 2, 6, 5, 8, 9}, + o2: []int{9, 3, 7, 4, 5, 6, 7, 1, 4}, + }, + { + p1: []int{1, 2, 3}, + p2: []int{3, 2, 1}, + indexes: []int{0}, + o1: []int{3, 2, 1}, + o2: []int{1, 2, 3}, + }, + { + p1: []int{1, 2, 3}, + p2: []int{3, 2, 1}, + indexes: []int{1}, + o1: []int{1, 2, 1}, + o2: []int{3, 2, 3}, + }, + { + p1: []int{1, 2, 3}, + p2: []int{3, 2, 1}, + indexes: []int{2}, + o1: []int{1, 2, 1}, + o2: []int{3, 2, 3}, + }, + { + p1: []int{1, 2, 3}, + p2: []int{3, 2, 1}, + indexes: []int{3}, + o1: []int{1, 2, 3}, + o2: []int{3, 2, 1}, + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + gnx(IntSlice(tc.p1), IntSlice(tc.p2), tc.indexes) + for i := range tc.p1 { + if tc.p1[i] != tc.o1[i] || tc.p2[i] != tc.o2[i] { + t.Error("Something went wrong during GNX crossover") + } + } + }) + } +} + +func TestCrossGNXFloat64(t *testing.T) { + var ( + rng = newRand() + p1 = []float64{1, 2, 3} + p2 = []float64{3, 2, 1} + o1 = []float64{1, 2, 1} + o2 = []float64{3, 2, 3} + ) + CrossGNXFloat64(p1, p2, 1, rng) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} + +func TestCrossGNXInt(t *testing.T) { + var ( + rng = newRand() + p1 = []int{1, 2, 3} + p2 = []int{3, 2, 1} + o1 = []int{1, 2, 1} + o2 = []int{3, 2, 3} + ) + CrossGNXInt(p1, p2, 1, rng) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} + +func TestCrossGNXString(t *testing.T) { + var ( + rng = newRand() + p1 = []string{"a", "b", "c"} + p2 = []string{"c", "b", "a"} + o1 = []string{"a", "b", "a"} + o2 = []string{"c", "b", "c"} + ) + CrossGNXString(p1, p2, 1, rng) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} + +func TestPMX(t *testing.T) { + var testCases = []struct { + p1 []int + p2 []int + a int + b int + o1 []int + o2 []int + }{ + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + a: 3, + b: 7, + o1: []int{9, 3, 2, 4, 5, 6, 7, 1, 8}, + o2: []int{1, 7, 3, 8, 2, 6, 5, 4, 9}, + }, + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + a: 0, + b: 9, + o1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + o2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + }, + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + a: 0, + b: 0, + o1: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + o2: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + pmx(IntSlice(tc.p1), IntSlice(tc.p2), tc.a, tc.b) + for i := range tc.p1 { + if tc.p1[i] != tc.o1[i] || tc.p2[i] != tc.o2[i] { + t.Error("Something went wrong during PMX crossover") + } + } + }) + } +} + +func TestCrossPMXFloat64(t *testing.T) { + var ( + rng = newRand() + p1 = []float64{1, 2, 3} + p2 = []float64{3, 2, 1} + o1 = []float64{1, 2, 3} + o2 = []float64{3, 2, 1} + ) + CrossPMXFloat64(p1, p2, rng) + // Check values + if reflect.DeepEqual(p1, o1) { + t.Error("Values should be different") + } + if reflect.DeepEqual(p2, o2) { + t.Error("Values should be different") + } +} + +func TestCrossPMXInt(t *testing.T) { + var ( + rng = newRand() + p1 = []int{1, 2, 3} + p2 = []int{3, 2, 1} + o1 = []int{1, 2, 3} + o2 = []int{3, 2, 1} + ) + CrossPMXInt(p1, p2, rng) + // Check values + if reflect.DeepEqual(p1, o1) { + t.Error("Values should be different") + } + if reflect.DeepEqual(p2, o2) { + t.Error("Values should be different") + } +} + +func TestCrossPMXString(t *testing.T) { + var ( + rng = newRand() + p1 = []string{"a", "b", "c"} + p2 = []string{"c", "b", "a"} + o1 = []string{"a", "b", "c"} + o2 = []string{"c", "b", "a"} + ) + CrossPMXString(p1, p2, rng) + // Check values + if reflect.DeepEqual(p1, o1) { + t.Error("Values should be different") + } + if reflect.DeepEqual(p2, o2) { + t.Error("Values should be different") + } +} + +func TestOX(t *testing.T) { + var testCases = []struct { + p1 []int + p2 []int + a int + b int + o1 []int + o2 []int + }{ + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + a: 3, + b: 7, + o1: []int{3, 8, 2, 4, 5, 6, 7, 1, 9}, + o2: []int{3, 4, 7, 8, 2, 6, 5, 9, 1}, + }, + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + a: 0, + b: 9, + o1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + o2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + }, + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + a: 0, + b: 0, + o1: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + o2: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + ox(IntSlice(tc.p1), IntSlice(tc.p2), tc.a, tc.b) + for i := range tc.p1 { + if tc.p1[i] != tc.o1[i] || tc.p2[i] != tc.o2[i] { + t.Error("Something went wrong during OX crossover") + } + } + }) + } +} + +func TestCrossOXFloat64(t *testing.T) { + var ( + rng = newRand() + p1 = []float64{1, 2, 3} + p2 = []float64{3, 2, 1} + o1 = []float64{1, 2, 3} + o2 = []float64{3, 2, 1} + ) + CrossOXFloat64(p1, p2, rng) + // Check values + if reflect.DeepEqual(p1, o1) { + t.Error("Values should be different") + } + if reflect.DeepEqual(p2, o2) { + t.Error("Values should be different") + } +} + +func TestCrossOXInt(t *testing.T) { + var ( + rng = newRand() + p1 = []int{1, 2, 3} + p2 = []int{3, 2, 1} + o1 = []int{1, 2, 3} + o2 = []int{3, 2, 1} + ) + CrossOXInt(p1, p2, rng) + // Check values + if reflect.DeepEqual(p1, o1) { + t.Error("Values should be different") + } + if reflect.DeepEqual(p2, o2) { + t.Error("Values should be different") + } +} + +func TestCrossOXString(t *testing.T) { + var ( + rng = newRand() + p1 = []string{"a", "b", "c"} + p2 = []string{"c", "b", "a"} + o1 = []string{"a", "b", "c"} + o2 = []string{"c", "b", "a"} + ) + CrossOXString(p1, p2, rng) + // Check values + if reflect.DeepEqual(p1, o1) { + t.Error("Values should be different") + } + if reflect.DeepEqual(p2, o2) { + t.Error("Values should be different") + } +} + +func TestCrossCX(t *testing.T) { + var testCases = []struct { + p1 []int + p2 []int + o1 []int + o2 []int + }{ + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + o1: []int{1, 3, 7, 4, 2, 6, 5, 8, 9}, + o2: []int{9, 2, 3, 8, 5, 6, 7, 1, 4}, + }, + { + p1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + p2: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + o1: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + o2: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + CrossCX(IntSlice(tc.p1), IntSlice(tc.p2)) + for i := range tc.p1 { + if tc.p1[i] != tc.o1[i] || tc.p2[i] != tc.o2[i] { + t.Error("Something went wrong during PMX crossover") + } + } + }) + } +} + +func TestCrossCXFloat64(t *testing.T) { + var ( + p1 = []float64{1, 2, 3, 4} + p2 = []float64{4, 3, 2, 1} + o1 = []float64{1, 3, 2, 4} + o2 = []float64{4, 2, 3, 1} + ) + CrossCXFloat64(p1, p2) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} + +func TestCrossCXInt(t *testing.T) { + var ( + p1 = []int{1, 2, 3, 4} + p2 = []int{4, 3, 2, 1} + o1 = []int{1, 3, 2, 4} + o2 = []int{4, 2, 3, 1} + ) + CrossCXInt(p1, p2) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} + +func TestCrossCXString(t *testing.T) { + var ( + p1 = []string{"a", "b", "c", "d"} + p2 = []string{"d", "c", "b", "a"} + o1 = []string{"a", "c", "b", "d"} + o2 = []string{"d", "b", "c", "a"} + ) + CrossCXString(p1, p2) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} + +func TestCrossERX(t *testing.T) { + var testCases = []struct { + p1 []string + p2 []string + }{ + { + p1: []string{"A", "B", "F", "E", "D", "G", "C"}, + p2: []string{"G", "F", "A", "B", "C", "D", "E"}, + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var ( + o1 = StringSlice(tc.p1).Copy() + o2 = StringSlice(tc.p2).Copy() + ) + CrossERX(o1, o2) + // Check offsprings have parent's first gene as first gene + if o1.At(0).(string) != tc.p1[0] || o2.At(0).(string) != tc.p2[0] { + t.Error("Something went wrong during ERX crossover") + } + // Check lengths + if o1.Len() != len(tc.p1) || o2.Len() != len(tc.p2) { + t.Error("Something went wrong during ERX crossover") + } + }) + } +} + +func TestCrossERXFloat64(t *testing.T) { + var ( + p1 = []float64{1, 2, 3} + p2 = []float64{2, 3, 1} + o1 = []float64{1, 3, 2} + o2 = []float64{2, 3, 1} + ) + CrossERXFloat64(p1, p2) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} + +func TestCrossERXInt(t *testing.T) { + var ( + p1 = []int{1, 2, 3} + p2 = []int{2, 3, 1} + o1 = []int{1, 3, 2} + o2 = []int{2, 3, 1} + ) + CrossERXInt(p1, p2) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} + +func TestCrossERXString(t *testing.T) { + var ( + p1 = []string{"a", "b", "c"} + p2 = []string{"b", "c", "a"} + o1 = []string{"a", "c", "b"} + o2 = []string{"b", "c", "a"} + ) + CrossERXString(p1, p2) + // Check values + if !reflect.DeepEqual(p1, o1) { + t.Errorf("Expected %v, got %v", o1, p1) + } + if !reflect.DeepEqual(p2, o2) { + t.Errorf("Expected %v, got %v", o2, p2) + } +} diff --git a/distance.go b/distance.go new file mode 100644 index 0000000..02b1c56 --- /dev/null +++ b/distance.go @@ -0,0 +1,128 @@ +package gago + +import ( + "fmt" + "math" +) + +// A Metric returns the distance between two genomes. +type Metric func(a, b Individual) float64 + +// A DistanceMemoizer computes and stores Metric calculations. +type DistanceMemoizer struct { + Metric Metric + Distances map[string]map[string]float64 + nCalculations int // Total number of calls to Metric for testing purposes +} + +// newDistanceMemoizer initializes a DistanceMemoizer. +func newDistanceMemoizer(metric Metric) DistanceMemoizer { + return DistanceMemoizer{ + Metric: metric, + Distances: make(map[string]map[string]float64), + } +} + +// GetDistance returns the distance between two Individuals based on the +// DistanceMemoizer's Metric field. If the two individuals share the same ID +// then GetDistance returns 0. DistanceMemoizer stores the calculated distances +// so that if GetDistance is called twice with the two same Individuals then +// the second call will return the stored distance instead of recomputing it. +func (dm *DistanceMemoizer) GetDistance(a, b Individual) float64 { + // Check if the two individuals are the same before proceding + if a.ID == b.ID { + return 0 + } + // Create maps if the genomes have never been encountered + if _, ok := dm.Distances[a.ID]; !ok { + dm.Distances[a.ID] = make(map[string]float64) + } else { + // Check if the distance between the two genomes has been calculated + if dist, ok := dm.Distances[a.ID][b.ID]; ok { + return dist + } + } + if _, ok := dm.Distances[b.ID]; !ok { + dm.Distances[b.ID] = make(map[string]float64) + } + // Calculate the distance between the genomes and memoize it + var dist = dm.Metric(a, b) + dm.Distances[a.ID][b.ID] = dist + dm.Distances[b.ID][a.ID] = dist + dm.nCalculations++ + return dist +} + +// calcAvgDistances returns a map that associates the ID of each provided +// Individual with the average distance the Individual has with the rest of the +// Individuals. +func calcAvgDistances(indis Individuals, dm DistanceMemoizer) map[string]float64 { + var avgDistances = make(map[string]float64) + for _, a := range indis { + for _, b := range indis { + avgDistances[a.ID] += dm.GetDistance(a, b) + } + avgDistances[a.ID] /= float64(len(indis) - 1) + } + return avgDistances +} + +func rebalanceClusters(clusters []Individuals, dm DistanceMemoizer, minPerCluster int) error { + // Calculate the number of missing Individuals per cluster for each cluster + // to reach at least minPerCluster Individuals. + var missing = make([]int, len(clusters)) + for i, cluster := range clusters { + // Check that the cluster has at least one Individual + if len(cluster) == 0 { + return fmt.Errorf("Cluster %d has 0 individuals", i) + } + // Calculate the number of missing Individual in the cluster to reach minPerCluster + missing[i] = minPerCluster - len(cluster) + } + // Check if there are enough Individuals to rebalance the clusters. + if sumInts(missing) >= 0 { + return fmt.Errorf("Missing %d individuals to be able to rebalance the clusters", + sumInts(missing)) + } + // Loop through the clusters that are missing Individuals + for i, cluster := range clusters { + // Check if the cluster is missing Individuals + if missing[i] <= 0 { + continue + } + // Assign new Individuals to the cluster while it is missing some + for missing[i] > 0 { + // Determine the medoid + cluster.SortByDistanceToMedoid(dm) + var medoid = cluster[0] + // Go through the Individuals of the other clusters and find the one + // closest to the computed medoid + var ( + cci int // Closest cluster index + cii int // Closest Individual index + minDist = math.Inf(1) + ) + for j := range clusters { + // Check that the cluster has Individuals to spare + if i == j || missing[j] >= 0 { + continue + } + // Find the closest Individual to the medoid inside the cluster + for k, indi := range clusters[j] { + var dist = dm.GetDistance(indi, medoid) + if dist < minDist { + cci = j + cii = k + minDist = dist + } + } + } + // Add the closest Individual to the cluster + clusters[i] = append(clusters[i], clusters[cci][cii]) + // Remove the closest Individual from the cluster it belonged to + clusters[cci] = append(clusters[cci][:cii], clusters[cci][cii+1:]...) + missing[i]-- + } + } + return nil +} diff --git a/distance_test.go b/distance_test.go new file mode 100644 index 0000000..ecf3541 --- /dev/null +++ b/distance_test.go @@ -0,0 +1,190 @@ +package gago + +import ( + "errors" + "fmt" + "testing" +) + +func TestDistanceMemoizer(t *testing.T) { + var ( + dm = newDistanceMemoizer(l1Distance) + a = Individual{Genome: Vector{1, 1, 1}, ID: "1"} + b = Individual{Genome: Vector{3, 3, 3}, ID: "2"} + c = Individual{Genome: Vector{6, 6, 6}, ID: "3"} + ) + // Check the number of calculations is initially 0 + if dm.nCalculations != 0 { + t.Error("nCalculations is not initialized to 0") + } + // Check the distance between the 1st and itself + if dm.GetDistance(a, a) != 0 { + t.Error("Wrong calculated distance") + } + // Check the number of calculations is initially 0 + if dm.nCalculations != 0 { + t.Error("nCalculations should not have increased") + } + // Check the distance between the 1st and the 2nd individual + if dm.GetDistance(a, b) != 6 { + t.Error("Wrong calculated distance") + } + // Check the number of calculations has increased by 1 + if dm.nCalculations != 1 { + t.Error("nCalculations has not increased") + } + // Check the distance between the 2nd and the 1st individual + if dm.GetDistance(b, a) != 6 { + t.Error("Wrong calculated distance") + } + // Check the number of calculations has not increased + if dm.nCalculations != 1 { + t.Error("nCalculations has increased") + } + // Check the distance between the 1st and the 3rd individual + if dm.GetDistance(a, c) != 15 { + t.Error("Wrong calculated distance") + } + // Check the distance between the 1st and the 3rd individual + if dm.GetDistance(b, c) != 9 { + t.Error("Wrong calculated distance") + } +} + +func TestSortByDistanceToMedoid(t *testing.T) { + var ( + indis = Individuals{ + Individual{Genome: Vector{3, 3, 3}, Fitness: 0}, + Individual{Genome: Vector{2, 2, 2}, Fitness: 1}, + Individual{Genome: Vector{5, 5, 5}, Fitness: 2}, + } + dm = newDistanceMemoizer(l1Distance) + ) + indis.SortByDistanceToMedoid(dm) + for i := range indis { + if indis[i].Fitness != float64(i) { + t.Error("Individuals were not sorted according to their distance to the medoid") + } + } +} + +func TestRebalanceClusters(t *testing.T) { + var testCases = []struct { + clusters []Individuals + dm DistanceMemoizer + minClusterSize int + newClusterSizes []int + err error + }{ + { + clusters: []Individuals{ + Individuals{ + Individual{Genome: Vector{1, 1, 1}, ID: "1"}, + Individual{Genome: Vector{1, 1, 1}, ID: "2"}, + Individual{Genome: Vector{1, 1, 1}, ID: "3"}, + Individual{Genome: Vector{2, 2, 2}, ID: "4"}, // Second furthest away from the cluster + Individual{Genome: Vector{3, 3, 3}, ID: "5"}, // Furthest away from the cluster + }, + Individuals{ + Individual{Genome: Vector{2, 2, 2}, ID: "6"}, + }, + Individuals{ + Individual{Genome: Vector{3, 3, 3}, ID: "7"}, + }, + }, + dm: newDistanceMemoizer(l1Distance), + minClusterSize: 2, + newClusterSizes: []int{3, 2, 2}, + err: nil, + }, + { + clusters: []Individuals{ + Individuals{ + Individual{Genome: Vector{1, 1, 1}, ID: "1"}, + Individual{Genome: Vector{1, 1, 1}, ID: "2"}, + }, + Individuals{}, + }, + dm: newDistanceMemoizer(l1Distance), + minClusterSize: 1, + newClusterSizes: []int{2, 0}, + err: errors.New("Cluster number 2 is empty"), + }, + { + clusters: []Individuals{ + Individuals{ + Individual{Genome: Vector{1, 1, 1}, ID: "1"}, + Individual{Genome: Vector{1, 1, 1}, ID: "2"}, + }, + Individuals{ + Individual{Genome: Vector{1, 1, 1}, ID: "3"}, + }, + }, + dm: newDistanceMemoizer(l1Distance), + minClusterSize: 2, + newClusterSizes: []int{2, 0}, + err: errors.New("Not enough individuals to rebalance"), + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var err = rebalanceClusters(tc.clusters, tc.dm, tc.minClusterSize) + // Check if the error is nil or not + if (err == nil) != (tc.err == nil) { + t.Errorf("Wrong error in test case number %d", i) + } + // Check new cluster sizes + if err == nil { + for j, cluster := range tc.clusters { + if len(cluster) != tc.newClusterSizes[j] { + t.Errorf("Wrong new cluster size in test case number %d", i) + } + } + } + }) + } +} + +// If a cluster is empty then rebalancing is impossible +func TestRebalanceClustersEmptyCluster(t *testing.T) { + var ( + clusters = []Individuals{ + Individuals{ + Individual{Genome: Vector{1, 1, 1}, ID: "1"}, + Individual{Genome: Vector{1, 1, 1}, ID: "2"}, + Individual{Genome: Vector{1, 1, 1}, ID: "3"}, + }, + Individuals{}, + } + dm = newDistanceMemoizer(l1Distance) + ) + var err = rebalanceClusters(clusters, dm, 2) + if err == nil { + t.Error("rebalanceClusters should have returned an error") + } +} + +// It's impossible to put 2 Individuals inside each cluster if there are 3 +// clusters and 5 individuals in total +func TestRebalanceClustersTooManyMissing(t *testing.T) { + var ( + clusters = []Individuals{ + Individuals{ + Individual{Genome: Vector{1, 1, 1}, ID: "1"}, + Individual{Genome: Vector{1, 1, 1}, ID: "2"}, + Individual{Genome: Vector{1, 1, 1}, ID: "3"}, + }, + Individuals{ + Individual{Genome: Vector{2, 2, 2}, ID: "6"}, + }, + Individuals{ + Individual{Genome: Vector{3, 3, 3}, ID: "7"}, + }, + } + dm = newDistanceMemoizer(l1Distance) + ) + var err = rebalanceClusters(clusters, dm, 2) + if err == nil { + t.Error("rebalanceClusters should have returned an error") + } +} diff --git a/ga.go b/ga.go new file mode 100644 index 0000000..84f6b92 --- /dev/null +++ b/ga.go @@ -0,0 +1,235 @@ +package gago + +import ( + "errors" + "log" + "math" + "math/rand" + "sort" + "time" +) + +// A GA contains population which themselves contain individuals. +type GA struct { + // Required fields + NewGenome NewGenome `json:"-"` + NPops int `json:"-"` // Number of Populations + PopSize int `json:"-"` // Number of Individuls per Population + Model Model `json:"-"` + + // Optional fields + NBest int `json:"-"` // Length of HallOfFame + Migrator Migrator `json:"-"` + MigFrequency int `json:"-"` // Frequency at which migrations occur + Speciator Speciator `json:"-"` + Logger *log.Logger `json:"-"` + Callback func(ga *GA) `json:"-"` + RNG *rand.Rand `json:"-"` + ParallelEval bool `json:"-"` + + // Fields generated at runtime + Populations Populations `json:"populations"` + HallOfFame Individuals `json:"hall_of_fame"` // Sorted best Individuals ever encountered + Age time.Duration `json:"duration"` // Duration during which the GA has been evolved + Generations int `json:"generations"` // Number of generations the GA has been evolved +} + +// Validate the parameters of a GA to ensure it will run correctly; some +// settings or combination of settings may be incoherent during runtime. +func (ga GA) Validate() error { + // Check the NewGenome presence + if ga.NewGenome == nil { + return errors.New("NewGenome cannot be nil") + } + // Check the number of populations is higher than 0 + if ga.NPops < 1 { + return errors.New("NPops should be higher than 0") + } + // Check the number of individuals per population is higher than 0 + if ga.PopSize < 1 { + return errors.New("PopSize should be higher than 0") + } + // Check the model presence + if ga.Model == nil { + return errors.New("Model cannot be nil") + } + // Check the model is valid + var modelErr = ga.Model.Validate() + if modelErr != nil { + return modelErr + } + // Check the migration frequency if a Migrator has been provided + if ga.Migrator != nil && ga.MigFrequency < 1 { + return errors.New("MigFrequency should be strictly higher than 0") + } + // Check the speciator is valid if it has been provided + if ga.Speciator != nil { + if specErr := ga.Speciator.Validate(); specErr != nil { + return specErr + } + } + // No error + return nil +} + +// Find the best current Individual in each population and then compare the best +// overall Individual to the current best Individual. The Individuals in each +// population are expected to be sorted. +func updateHallOfFame(hof Individuals, indis Individuals) { + var k = len(hof) + // Start by finding the current best Individual + for _, indi := range indis[:min(k, len(indis))] { + // Find if and where the Individual should fit in the hall of fame + var ( + f = func(i int) bool { return indi.Fitness < hof[i].Fitness } + i = sort.Search(k, f) + ) + if i < k { + // Shift the hall of fame to the right + copy(hof[i+1:], hof[i:]) + // Insert the new Individual + hof[i] = indi + } + } +} + +// Initialized indicates if the GA has been initialized or not. +func (ga GA) Initialized() bool { + if len(ga.Populations) != ga.NPops { + return false + } + return true +} + +// Initialize each population in the GA and assign an initial fitness to each +// individual in each population. Running Initialize after running Evolve will +// reset the GA entirely. +func (ga *GA) Initialize() { + // Check the NBest field + if ga.NBest < 1 { + ga.NBest = 1 + } + // Initialize the random number generator if it hasn't been set + if ga.RNG == nil { + ga.RNG = rand.New(rand.NewSource(time.Now().UnixNano())) + } + ga.Populations = make([]Population, ga.NPops) + for i := range ga.Populations { + // Generate a Population + ga.Populations[i] = newPopulation(ga.PopSize, ga.NewGenome, ga.RNG) + // Evaluate its Individuals + ga.Populations[i].Individuals.Evaluate(ga.ParallelEval) + // Sort it's Individuals + ga.Populations[i].Individuals.SortByFitness() + // Log current statistics if a logger has been provided + if ga.Logger != nil { + ga.Populations[i].Log(ga.Logger) + } + } + // Initialize HallOfFame + ga.HallOfFame = make(Individuals, ga.NBest) + for i := range ga.HallOfFame { + ga.HallOfFame[i] = Individual{Fitness: math.Inf(1)} + } + for _, pop := range ga.Populations { + updateHallOfFame(ga.HallOfFame, pop.Individuals) + } + // Execute the callback if it has been set + if ga.Callback != nil { + ga.Callback(ga) + } +} + +// Evolve each population in the GA. The population level operations are done +// in parallel with a wait group. After all the population operations have been +// run, the GA level operations are run. +func (ga *GA) Evolve() error { + var start = time.Now() + ga.Generations++ + // Check the GA has been initialized + if !ga.Initialized() { + return errors.New("The GA has not been initialized") + } + // Migrate the individuals between the populations if there are at least 2 + // Populations and that there is a migrator and that the migration frequency + // divides the generation count + if len(ga.Populations) > 1 && ga.Migrator != nil && ga.Generations%ga.MigFrequency == 0 { + ga.Migrator.Apply(ga.Populations, ga.RNG) + } + + var f = func(pop *Population) error { + var err error + // Apply speciation if a positive number of species has been specified + if ga.Speciator != nil { + err = pop.speciateEvolveMerge(ga.Speciator, ga.Model) + if err != nil { + return err + } + } else { + // Else apply the evolution model to the entire population + err = ga.Model.Apply(pop) + if err != nil { + return err + } + } + // Evaluate and sort + pop.Individuals.Evaluate(ga.ParallelEval) + pop.Individuals.SortByFitness() + pop.Age += time.Since(start) + pop.Generations++ + // Log current statistics if a logger has been provided + if ga.Logger != nil { + pop.Log(ga.Logger) + } + return err + } + + var err = ga.Populations.Apply(f, true) + if err != nil { + return err + } + + // Update HallOfFame + for _, pop := range ga.Populations { + updateHallOfFame(ga.HallOfFame, pop.Individuals) + } + // Execute the callback if it has been set + if ga.Callback != nil { + ga.Callback(ga) + } + ga.Age += time.Since(start) + // No error + return nil +} + +func (pop *Population) speciateEvolveMerge(spec Speciator, model Model) error { + var ( + species, err = spec.Apply(pop.Individuals, pop.RNG) + pops = make([]Population, len(species)) + ) + if err != nil { + return err + } + // Create a subpopulation from each specie so that the evolution Model can + // be applied to it. + for i, specie := range species { + pops[i] = Population{ + Individuals: specie, + Age: pop.Age, + Generations: pop.Generations, + ID: randString(len(pop.ID), pop.RNG), + RNG: pop.RNG, + } + err = model.Apply(&pops[i]) + if err != nil { + return err + } + } + // Merge each species back into the original population + var i int + for _, subpop := range pops { + copy(pop.Individuals[i:i+len(subpop.Individuals)], subpop.Individuals) + i += len(subpop.Individuals) + } + return nil +} diff --git a/ga_test.go b/ga_test.go new file mode 100644 index 0000000..fe62c34 --- /dev/null +++ b/ga_test.go @@ -0,0 +1,364 @@ +package gago + +import ( + "errors" + "fmt" + "math" + "math/rand" + "testing" + "time" +) + +func TestInitialized(t *testing.T) { + var ga = GA{ + NewGenome: NewVector, + NPops: 1, + PopSize: 10, + } + if ga.Initialized() { + t.Error("GA should not yet be initialized") + } + ga.Initialize() + if !ga.Initialized() { + t.Error("GA should be initialized") + } +} + +func TestValidationSuccess(t *testing.T) { + var err = ga.Validate() + if err != nil { + t.Error("GA parameters are invalid") + } +} + +func TestValidationNewGenome(t *testing.T) { + var genomeFactory = ga.NewGenome + ga.NewGenome = nil + if ga.Validate() == nil { + t.Error("Nil NewGenome should return an error") + } + ga.NewGenome = genomeFactory +} + +func TestValidationNPopulations(t *testing.T) { + var nPops = ga.NPops + ga.NPops = -1 + if ga.Validate() == nil { + t.Error("Invalid number of Populations should return an error") + } + ga.NPops = nPops +} + +func TestValidationNIndividuals(t *testing.T) { + var popSize = ga.PopSize + ga.PopSize = -1 + if ga.Validate() == nil { + t.Error("Invalid number of Individuals should return an error") + } + ga.PopSize = popSize +} + +func TestValidationModel(t *testing.T) { + var model = ga.Model + // Check nil model raises error + ga.Model = nil + if ga.Validate() == nil { + t.Error("Nil Model should return an error") + } + // Check invalid model raises error + ga.Model = ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: -1, + } + if ga.Validate() == nil { + t.Error("Invalid Model should return an error") + } + ga.Model = model +} + +func TestValidationMigFrequency(t *testing.T) { + var ( + migrator = ga.Migrator + migFrequency = ga.MigFrequency + ) + ga.Migrator = MigRing{} + ga.MigFrequency = 0 + if ga.Validate() == nil { + t.Error("Invalid MigFrequency should return an error") + } + ga.Migrator = migrator + ga.MigFrequency = migFrequency +} + +func TestValidationSpeciator(t *testing.T) { + var speciator = ga.Speciator + ga.Speciator = SpecFitnessInterval{0} + if ga.Validate() == nil { + t.Error("Invalid Speciator should return an error") + } + ga.Speciator = speciator +} + +func TestApplyWithSpeciator(t *testing.T) { + var speciator = ga.Speciator + ga.Speciator = SpecFitnessInterval{4} + if ga.Evolve() != nil { + t.Error("Calling Apply with a valid Speciator should not return an error") + } + ga.Speciator = speciator +} + +func TestRandomNumberGenerators(t *testing.T) { + for i, pop1 := range ga.Populations { + for j, pop2 := range ga.Populations { + if i != j && &pop1.RNG == &pop2.RNG { + t.Error("Population should not share random number generators") + } + } + } +} + +func TestBest(t *testing.T) { + for _, pop := range ga.Populations { + for _, indi := range pop.Individuals { + if ga.HallOfFame[0].Fitness > indi.Fitness { + t.Error("The current best individual is not the overall best") + } + } + } +} + +func TestUpdateHallOfFame(t *testing.T) { + var ( + testCases = []struct { + hofIn Individuals + indis Individuals + hofOut Individuals + }{ + { + hofIn: Individuals{ + Individual{Fitness: math.Inf(1)}, + }, + indis: Individuals{ + Individual{Fitness: 0}, + }, + hofOut: Individuals{ + Individual{Fitness: 0}, + }, + }, + { + hofIn: Individuals{ + Individual{Fitness: 0}, + Individual{Fitness: math.Inf(1)}, + }, + indis: Individuals{ + Individual{Fitness: 1}, + }, + hofOut: Individuals{ + Individual{Fitness: 0}, + Individual{Fitness: 1}, + }, + }, + } + ) + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + updateHallOfFame(tc.hofIn, tc.indis) + // Compare the obtained hall of fame to the expected one) + for i, indi := range tc.hofIn { + if indi.Fitness != tc.hofOut[i].Fitness { + t.Errorf("Expected %v, got %v", tc.hofOut[i], indi) + } + } + }) + } +} + +// TestDuration verifies the sum of the duration of each population is higher +// the actual duration. This is due to the fact that each population runs on a +// separate core. +func TestDuration(t *testing.T) { + var totalDuration time.Duration + for _, pop := range ga.Populations { + totalDuration += pop.Age + } + if totalDuration < ga.Age { + t.Error("Inefficient parallelism") + } +} + +func TestSpeciateEvolveMerge(t *testing.T) { + var ( + rng = newRand() + testCases = []struct { + pop Population + speciator Speciator + model Model + err error + }{ + { + pop: Population{ + ID: "42", + RNG: rng, + Individuals: Individuals{ + Individual{Fitness: 0}, + Individual{Fitness: 1}, + Individual{Fitness: 2}, + Individual{Fitness: 3}, + Individual{Fitness: 4}, + }, + }, + speciator: SpecFitnessInterval{3}, + model: ModIdentity{}, + err: nil, + }, + { + pop: Population{ + ID: "42", + RNG: rng, + Individuals: Individuals{ + Individual{Fitness: 0}, + Individual{Fitness: 1}, + Individual{Fitness: 2}, + }, + }, + speciator: SpecFitnessInterval{4}, + model: ModIdentity{}, + err: errors.New("Invalid speciator"), + }, + { + pop: Population{ + ID: "42", + RNG: rng, + Individuals: Individuals{ + Individual{Fitness: 0}, + Individual{Fitness: 1}, + Individual{Fitness: 2}, + Individual{Fitness: 3}, + Individual{Fitness: 4}, + }, + }, + speciator: SpecFitnessInterval{3}, + model: ModGenerational{ + Selector: SelTournament{6}, + MutRate: 0.5, + }, + err: errors.New("Invalid model"), + }, + } + ) + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var err = tc.pop.speciateEvolveMerge(tc.speciator, tc.model) + if (err == nil) != (tc.err == nil) { + t.Errorf("Wrong error in test case number %d", i) + } + // If there is no error check the individuals are ordered as they were + // at they were initially + if err == nil { + for j, indi := range tc.pop.Individuals { + if indi.Fitness != float64(j) { + t.Errorf("Wrong result in test case number %d", i) + } + } + } + }) + } +} + +func TestCallback(t *testing.T) { + var ( + counter int + incrementCounter = func(ga *GA) { + counter++ + } + ) + ga.Callback = incrementCounter + ga.Initialize() + if counter != 1 { + t.Error("Counter was not incremented by the callback at initialization") + } + ga.Evolve() + if counter != 2 { + t.Error("Counter was not incremented by the callback at enhancement") + } +} + +func TestGAEvolveModelRuntimeError(t *testing.T) { + var model = ga.Model + ga.Model = ModRuntimeError{} + // Check invalid model doesn't raise error + if ga.Validate() != nil { + t.Errorf("Expected nil, got %s", ga.Validate()) + } + // Evolve + var err = ga.Evolve() + if err == nil { + t.Error("An error should have been raised") + } + ga.Model = model +} + +func TestGAEvolveSpeciatorRuntimeError(t *testing.T) { + var speciator = ga.Speciator + ga.Speciator = SpecRuntimeError{} + // Check invalid speciator doesn't raise error + if ga.Validate() != nil { + t.Errorf("Expected nil, got %s", ga.Validate()) + } + // Evolve + var err = ga.Evolve() + if err == nil { + t.Error("An error should have been raised") + } + ga.Speciator = speciator +} + +func TestGAConsistentResults(t *testing.T) { + var ( + ga1 = GA{ + NewGenome: NewVector, + NPops: 2, + PopSize: 10, + Model: ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: 0.5, + }, + RNG: rand.New(rand.NewSource(42)), + } + ga2 = GA{ + NewGenome: NewVector, + NPops: 2, + PopSize: 10, + Model: ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: 0.5, + }, + RNG: rand.New(rand.NewSource(42)), + } + ) + + // Run the first GA + ga1.Initialize() + for i := 0; i < 20; i++ { + ga1.Evolve() + } + + // Run the second GA + ga2.Initialize() + for i := 0; i < 20; i++ { + ga2.Evolve() + } + + // Compare best individuals + if ga1.HallOfFame[0].Fitness != ga2.HallOfFame[0].Fitness { + t.Errorf("Expected %f, got %f", ga1.HallOfFame[0].Fitness, ga2.HallOfFame[0].Fitness) + } + +} diff --git a/genome.go b/genome.go new file mode 100644 index 0000000..65a6594 --- /dev/null +++ b/genome.go @@ -0,0 +1,16 @@ +package gago + +import "math/rand" + +// A Genome is an object that can have any number and kinds of properties. As +// long as it can be evaluated, mutated and crossedover then it can evolved. +type Genome interface { + Evaluate() float64 + Mutate(rng *rand.Rand) + Crossover(genome Genome, rng *rand.Rand) + Clone() Genome +} + +// A NewGenome is a method that generates a new Genome with random +// properties. +type NewGenome func(rng *rand.Rand) Genome diff --git a/individual.go b/individual.go new file mode 100644 index 0000000..ec9b3ff --- /dev/null +++ b/individual.go @@ -0,0 +1,87 @@ +package gago + +import ( + "fmt" + "math" + "math/rand" +) + +// An Individual wraps a Genome and contains the fitness assigned to the Genome. +type Individual struct { + Genome Genome `json:"genome"` + Fitness float64 `json:"fitness"` + Evaluated bool `json:"-"` + ID string `json:"id"` +} + +// NewIndividual returns a fresh individual. +func NewIndividual(genome Genome, rng *rand.Rand) Individual { + return Individual{ + Genome: genome, + Fitness: math.Inf(1), + Evaluated: false, + ID: randString(6, rng), + } +} + +// String representation of an Individual. A tick (✔) or cross (✘) marker is +// added at the end to indicate if the Individual has been evaluated or not. +func (indi Individual) String() string { + if indi.Evaluated { + return fmt.Sprintf("%s - %.3f - %v", indi.ID, indi.Fitness, indi.Genome) + } + return fmt.Sprintf("%s - ??? - %v", indi.ID, indi.Genome) +} + +// Clone an individual to produce a new individual with a different pointer and +// a different ID. +func (indi Individual) Clone(rng *rand.Rand) Individual { + return Individual{ + Genome: indi.Genome.Clone(), + Fitness: indi.Fitness, + Evaluated: indi.Evaluated, + ID: randString(6, rng), + } +} + +// Evaluate the fitness of an individual. Don't evaluate individuals that have +// already been evaluated. +func (indi *Individual) Evaluate() { + if !indi.Evaluated { + indi.Fitness = indi.Genome.Evaluate() + indi.Evaluated = true + } +} + +// GetFitness returns the fitness of an Individual after making sure it has been +// evaluated. +func (indi *Individual) GetFitness() float64 { + indi.Evaluate() + return indi.Fitness +} + +// Mutate an individual by calling the Mutate method of it's Genome. +func (indi *Individual) Mutate(rng *rand.Rand) { + indi.Genome.Mutate(rng) + indi.Evaluated = false +} + +// Crossover an individual by calling the Crossover method of it's Genome. +func (indi *Individual) Crossover(mate Individual, rng *rand.Rand) { + indi.Genome.Crossover(mate.Genome, rng) + indi.Evaluated = false + mate.Evaluated = false +} + +// IdxOfClosest returns the index of the closest individual from a slice of +// individuals based on the Metric field of a DistanceMemoizer. +func (indi Individual) IdxOfClosest(indis Individuals, dm DistanceMemoizer) (i int) { + var min = math.Inf(1) + for j, candidate := range indis { + var dist = dm.GetDistance(indi, candidate) + if dist < min { + min, i = dist, j + } + } + return i +} diff --git a/individual_test.go b/individual_test.go new file mode 100644 index 0000000..9b2826f --- /dev/null +++ b/individual_test.go @@ -0,0 +1,96 @@ +package gago + +import ( + "fmt" + "testing" +) + +func TestIndividualString(t *testing.T) { + var testCases = []struct { + indi Individual + str string + }{ + { + indi: Individual{ + Genome: Vector{0, 1, 2}, + Fitness: 42, + Evaluated: true, + ID: "bob", + }, + str: "bob - 42.000 - [0 1 2]", + }, + { + indi: Individual{ + Genome: Vector{0, 1, 2}, + Fitness: 42, + Evaluated: false, + ID: "ALICE", + }, + str: "ALICE - ??? - [0 1 2]", + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + if tc.indi.String() != tc.str { + t.Errorf("Expected %s, got %s", tc.str, tc.indi.String()) + } + }) + } +} + +func TestCloneIndividual(t *testing.T) { + var ( + rng = newRand() + genome = NewVector(rng) + indi1 = NewIndividual(genome, rng) + indi2 = indi1.Clone(rng) + ) + if &indi1 == &indi2 || &indi1.Genome == &indi2.Genome { + t.Error("Individual was not deep copied") + } +} + +func TestEvaluateIndividual(t *testing.T) { + var ( + rng = newRand() + genome = NewVector(rng) + indi = NewIndividual(genome, rng) + ) + if indi.Evaluated { + t.Error("Individual shouldn't have Evaluated set to True") + } + indi.Evaluate() + if !indi.Evaluated { + t.Error("Individual should have Evaluated set to True") + } +} + +func TestMutateIndividual(t *testing.T) { + var ( + rng = newRand() + genome = NewVector(rng) + indi = NewIndividual(genome, rng) + ) + indi.Evaluate() + indi.Mutate(rng) + if indi.Evaluated { + t.Error("Individual shouldn't have Evaluated set to True") + } +} + +func TestCrossoverIndividual(t *testing.T) { + var ( + rng = newRand() + indi1 = NewIndividual(NewVector(rng), rng) + indi2 = NewIndividual(NewVector(rng), rng) + offspring1 = indi1.Clone(rng) + offspring2 = indi2.Clone(rng) + ) + indi1.Crossover(indi2, rng) + if offspring1.Evaluated || offspring2.Evaluated { + t.Error("Offsprings shouldn't have Evaluated set to True") + } + if &offspring1 == &indi1 || &offspring1 == &indi2 || &offspring2 == &indi1 || &offspring2 == &indi2 { + t.Error("Offsprings shouldn't share pointers with parents") + } +} diff --git a/individuals.go b/individuals.go new file mode 100644 index 0000000..9b1fa44 --- /dev/null +++ b/individuals.go @@ -0,0 +1,148 @@ +package gago + +import ( + "math" + "math/rand" + "sort" + "strings" + + "golang.org/x/sync/errgroup" +) + +// Individuals is a convenience type, methods that belong to an Individual can +// be called declaratively. +type Individuals []Individual + +// String representation of a slice of Individuals. +func (indis Individuals) String() string { + var str string + for _, indi := range indis { + str += indi.String() + "\n" + } + return strings.TrimSuffix(str, "\n") +} + +// Clone returns the same exact same slice of individuals but with different +// pointers and ID fields. +func (indis Individuals) Clone(rng *rand.Rand) Individuals { + var clones = make(Individuals, len(indis)) + for i, indi := range indis { + clones[i] = indi.Clone(rng) + } + return clones +} + +// Generate a slice of n new individuals. +func newIndividuals(n int, newGenome NewGenome, rng *rand.Rand) Individuals { + var indis = make(Individuals, n) + for i := range indis { + indis[i] = NewIndividual(newGenome(rng), rng) + } + return indis +} + +// Apply a function to a slice of Individuals. +func (indis Individuals) Apply(f func(indi *Individual) error, parallel bool) error { + if parallel { + var g errgroup.Group + for i := range indis { + i := i // https://golang.org/doc/faq#closures_and_goroutines + g.Go(func() error { + return f(&indis[i]) + }) + } + return g.Wait() + } + var err error + for i := range indis { + err = f(&indis[i]) + if err != nil { + return err + } + } + return err +} + +// Evaluate each Individual. If parallel is true then each Individual will be +// evaluated in parallel thanks to the golang.org/x/sync/errgroup package. If +// not then a simple sequential loop will be used. Evaluating in parallel is +// only recommended for cases where evaluating an Individual takes a "long" +// time. Indeed there won't necessarily be a speed-up when evaluating in +// parallel. In fact performance can be degraded if evaluating an Individual is +// too cheap. +func (indis Individuals) Evaluate(parallel bool) { + indis.Apply( + func(indi *Individual) error { indi.Evaluate(); return nil }, + parallel, + ) +} + +// Mutate each individual. +func (indis Individuals) Mutate(mutRate float64, rng *rand.Rand) { + for i := range indis { + if rng.Float64() < mutRate { + indis[i].Mutate(rng) + } + } +} + +// SortByFitness ascendingly sorts individuals by fitness. +func (indis Individuals) SortByFitness() { + var less = func(i, j int) bool { return indis[i].Fitness < indis[j].Fitness } + sort.Slice(indis, less) +} + +// IsSortedByFitness checks if individuals are ascendingly sorted by fitness. +func (indis Individuals) IsSortedByFitness() bool { + var less = func(i, j int) bool { return indis[i].Fitness < indis[j].Fitness } + return sort.SliceIsSorted(indis, less) +} + +// SortByDistanceToMedoid sorts Individuals according to their distance to the +// medoid. The medoid is the Individual that has the lowest average distance to +// the rest of the Individuals. +func (indis Individuals) SortByDistanceToMedoid(dm DistanceMemoizer) { + var ( + avgDists = calcAvgDistances(indis, dm) + less = func(i, j int) bool { + return avgDists[indis[i].ID] < avgDists[indis[j].ID] + } + ) + sort.Slice(indis, less) +} + +// Extract the fitness of a slice of individuals into a float64 slice. +func (indis Individuals) getFitnesses() []float64 { + var fitnesses = make([]float64, len(indis)) + for i, indi := range indis { + fitnesses[i] = indi.Fitness + } + return fitnesses +} + +// FitMin returns the best fitness of a slice of individuals. +func (indis Individuals) FitMin() float64 { + if indis.IsSortedByFitness() { + return indis[0].Fitness + } + return minFloat64s(indis.getFitnesses()) +} + +// FitMax returns the best fitness of a slice of individuals. +func (indis Individuals) FitMax() float64 { + if indis.IsSortedByFitness() { + return indis[len(indis)-1].Fitness + } + return maxFloat64s(indis.getFitnesses()) +} + +// FitAvg returns the average fitness of a slice of individuals. +func (indis Individuals) FitAvg() float64 { + return meanFloat64s(indis.getFitnesses()) +} + +// FitStd returns the standard deviation of the fitness of a slice of +// individuals. +func (indis Individuals) FitStd() float64 { + return math.Sqrt(varianceFloat64s(indis.getFitnesses())) +} diff --git a/individuals_test.go b/individuals_test.go new file mode 100644 index 0000000..a90c308 --- /dev/null +++ b/individuals_test.go @@ -0,0 +1,244 @@ +package gago + +import ( + "fmt" + "math" + "testing" +) + +func TestIndividualsString(t *testing.T) { + var testCases = []struct { + indis Individuals + str string + }{ + { + indis: Individuals{ + Individual{ + Genome: Vector{0, 1, 2}, + Fitness: 42, + Evaluated: true, + ID: "bob", + }, + Individual{ + Genome: Vector{0, 1, 2}, + Fitness: 42, + Evaluated: false, + ID: "ALICE", + }, + }, + str: "bob - 42.000 - [0 1 2]\nALICE - ??? - [0 1 2]", + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + if tc.indis.String() != tc.str { + t.Errorf("Expected %s, got %s", tc.str, tc.indis.String()) + } + }) + } +} + +func TestNewIndividuals(t *testing.T) { + var rng = newRand() + for _, n := range []int{1, 2, 42} { + var indis = newIndividuals(n, NewVector, rng) + if len(indis) != n { + t.Error("newIndividuals didn't generate the right number of individuals") + } + } +} + +func TestCloneIndividuals(t *testing.T) { + var ( + rng = newRand() + indis = newIndividuals(20, NewVector, rng) + clones = indis.Clone(rng) + ) + for _, indi := range indis { + for _, clone := range clones { + if &indi == &clone || indi.ID == clone.ID { + t.Error("Cloning did not work as expected") + } + } + } +} + +func TestEvaluateIndividuals(t *testing.T) { + var indis = newIndividuals(10, NewVector, newRand()) + for _, indi := range indis { + if indi.Evaluated { + t.Error("Individual shouldn't have Evaluated set to True") + } + } + indis.Evaluate(false) + for _, indi := range indis { + if !indi.Evaluated { + t.Error("Individual should have Evaluated set to True") + } + } +} + +func TestEvaluateIndividualsParallel(t *testing.T) { + var indis = newIndividuals(10, NewVector, newRand()) + for _, indi := range indis { + if indi.Evaluated { + t.Error("Individual shouldn't have Evaluated set to True") + } + } + indis.Evaluate(true) + for _, indi := range indis { + if !indi.Evaluated { + t.Error("Individual should have Evaluated set to True") + } + } +} + +func TestMutateIndividuals(t *testing.T) { + var ( + rng = newRand() + indis = newIndividuals(10, NewVector, rng) + ) + indis.Evaluate(false) + indis.Mutate(1, rng) + for _, indi := range indis { + if indi.Evaluated { + t.Error("Individual shouldn't have Evaluated set to True") + } + } +} + +func TestIndividualsSortByFitness(t *testing.T) { + var indis = newIndividuals(10, NewVector, newRand()) + // Assign a fitness to each individual in decreasing order + for i := range indis { + indis[i].Fitness = float64(len(indis) - i) + } + indis.SortByFitness() + // Check fitnesses are in increasing order + for i := 1; i < len(indis); i++ { + if indis[i-1].Fitness > indis[i].Fitness { + t.Error("Individuals are not sorted") + } + } +} + +func TestGetFitnesses(t *testing.T) { + var ( + indis = Individuals{ + Individual{Fitness: 0.0}, + Individual{Fitness: 1.0}, + Individual{Fitness: 2.0}, + } + target = []float64{0.0, 1.0, 2.0} + fitnesses = indis.getFitnesses() + ) + for i, fitness := range fitnesses { + if fitness != target[i] { + t.Error("getFitnesses didn't work as expected") + } + } +} + +func TestFitMin(t *testing.T) { + var testCases = []struct { + indis Individuals + min float64 + }{ + {Individuals{ + Individual{Fitness: 1.0}, + }, 1.0}, + {Individuals{ + Individual{Fitness: 2.0}, + Individual{Fitness: 1.0}, + }, 1.0}, + {Individuals{ + Individual{Fitness: 1.0}, + Individual{Fitness: -1.0}, + }, -1.0}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + if tc.indis.FitMin() != tc.min { + t.Error("FitMin didn't work as expected") + } + }) + } +} + +func TestFitMax(t *testing.T) { + var testCases = []struct { + indis Individuals + max float64 + }{ + {Individuals{ + Individual{Fitness: 1.0}, + }, 1.0}, + {Individuals{ + Individual{Fitness: 2.0}, + Individual{Fitness: 1.0}, + }, 2.0}, + {Individuals{ + Individual{Fitness: 1.0}, + Individual{Fitness: -1.0}, + }, 1.0}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + if tc.indis.FitMax() != tc.max { + t.Error("FitMax didn't work as expected") + } + }) + } +} + +func TestFitAvg(t *testing.T) { + var testCases = []struct { + indis Individuals + mean float64 + }{ + {Individuals{ + Individual{Fitness: 1.0}, + }, 1.0}, + {Individuals{ + Individual{Fitness: 1.0}, + Individual{Fitness: 2.0}, + }, 1.5}, + {Individuals{ + Individual{Fitness: -1.0}, + Individual{Fitness: 1.0}, + }, 0.0}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + if tc.indis.FitAvg() != tc.mean { + t.Error("FitAvg didn't work as expected") + } + }) + } +} + +func TestFitStd(t *testing.T) { + var testCases = []struct { + indis Individuals + variance float64 + }{ + {Individuals{ + Individual{Fitness: 1.0}, + }, 0.0}, + {Individuals{ + Individual{Fitness: -1.0}, + Individual{Fitness: 1.0}, + }, 1.0}, + {Individuals{ + Individual{Fitness: -2.0}, + Individual{Fitness: 2.0}, + }, 4.0}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + if tc.indis.FitStd() != math.Sqrt(tc.variance) { + t.Error("FitStd didn't work as expected") + } + }) + } +} diff --git a/initialization.go b/initialization.go new file mode 100644 index 0000000..2d3559e --- /dev/null +++ b/initialization.go @@ -0,0 +1,52 @@ +package gago + +import "math/rand" + +// InitUnifFloat64 generates random float64s x such that lower < x < upper. +func InitUnifFloat64(n int, lower, upper float64, rng *rand.Rand) (floats []float64) { + floats = make([]float64, n) + for i := range floats { + floats[i] = lower + rng.Float64()*(upper-lower) + } + return +} + +// InitJaggFloat64 generates random float64s x such that lower < x < upper with jagged bounds +func InitJaggFloat64(n int, lower, upper []float64, rng *rand.Rand) (floats []float64) { + floats = make([]float64, n) + for i := range floats { + floats[i] = lower[i] + rng.Float64()*(upper[i]-lower[i]) + } + return +} + +// InitNormFloat64 generates random float64s sampled from a normal distribution. +func InitNormFloat64(n int, mean, std float64, rng *rand.Rand) (floats []float64) { + floats = make([]float64, n) + for i := range floats { + floats[i] = rng.NormFloat64()*std + mean + } + return +} + +// InitUnifString generates random strings based on a given corpus. The strings +// are not necessarily distinct. +func InitUnifString(n int, corpus []string, rng *rand.Rand) (strings []string) { + strings = make([]string, n) + for i := range strings { + strings[i] = corpus[rng.Intn(len(corpus))] + } + return +} + +// InitUniqueString generates random string slices based on a given corpus, each +// element from the corpus is only represented once in each slice. The method +// starts by shuffling, it then assigns the elements of the corpus in increasing +// index order to an individual. +func InitUniqueString(n int, corpus []string, rng *rand.Rand) (strings []string) { + strings = make([]string, n) + for i, v := range randomInts(n, 0, len(corpus), rng) { + strings[i] = corpus[v] + } + return +} diff --git a/initialization_test.go b/initialization_test.go new file mode 100644 index 0000000..52c2319 --- /dev/null +++ b/initialization_test.go @@ -0,0 +1,156 @@ +package gago + +import ( + "fmt" + "strings" + "testing" +) + +func TestInitUnifFloat64(t *testing.T) { + var ( + testCases = []struct { + n int + bounds struct{ lower, upper float64 } + }{ + { + n: 0, + bounds: struct{ lower, upper float64 }{-1, 0}, + }, + { + n: 1, + bounds: struct{ lower, upper float64 }{-1, 0}, + }, + { + n: 2, + bounds: struct{ lower, upper float64 }{-1, 0}, + }, + { + n: 42, + bounds: struct{ lower, upper float64 }{-1, 0}, + }, + { + n: 3, + bounds: struct{ lower, upper float64 }{0, 1}, + }, + { + n: 3, + bounds: struct{ lower, upper float64 }{-1, 1}, + }, + } + rng = newRand() + ) + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var vector = InitUnifFloat64(tc.n, tc.bounds.lower, tc.bounds.upper, rng) + // Check length + if len(vector) != tc.n { + t.Error("InitUnifFloat64 didn't produce the right number of values") + } + // Check values are bounded + for _, v := range vector { + if v <= tc.bounds.lower || v >= tc.bounds.upper { + t.Error("InitUnifFloat64 produced out of bound values") + } + } + }) + } +} + +func TestInitJaggFloat64(t *testing.T) { + var ( + N = []int{0, 1, 2, 42} + rng = newRand() + ) + for _, n := range N { + var ( + lower = make([]float64, n) + upper = make([]float64, n) + ) + + for i := 0; i < n; i++ { + lower[i] = 0.0 + rng.Float64()*100.0 + upper[i] = lower[i] + rng.Float64()*100.0 + } + + var vector = InitJaggFloat64(n, lower, upper, rng) + // Check length + if len(vector) != n { + t.Error("InitJaggFloat64 didn't produce the right number of values") + } + // Check values are bounded + for i, v := range vector { + if v <= lower[i] || v >= upper[i] { + t.Error("InitJaggFloat64 produced out of bound values") + } + } + } +} + +func TestInitNormFloat64(t *testing.T) { + var rng = newRand() + for _, n := range []int{0, 1, 2, 42} { + if len(InitNormFloat64(n, 0, 1, rng)) != n { + t.Error("InitNormFloat64 didn't produce the right number of values") + } + } +} + +func TestInitUnifString(t *testing.T) { + var ( + rng = newRand() + corpus = strings.Split("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", "") + ) + for _, n := range []int{0, 1, 2, 42} { + var genome = InitUnifString(n, corpus, rng) + if len(genome) != n { + t.Error("InitUnifString didn't produce the right number of values") + } + // Check the values are part of the corpus + for _, v := range genome { + var partOfCorpus = false + for _, c := range corpus { + if v == c { + partOfCorpus = true + break + } + } + if !partOfCorpus { + t.Error("InitUnifString produced a value out of the corpus") + } + } + } +} + +func TestInitUniqueString(t *testing.T) { + var ( + rng = newRand() + corpus = strings.Split("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", "") + ) + for _, n := range []int{0, 1, 2, 42} { + var genome = InitUniqueString(n, corpus, rng) + if len(genome) != n { + t.Error("InitUniqueString didn't produce the right number of values") + } + // Check the values are part of the corpus + for _, v := range genome { + var partOfCorpus = false + for _, c := range corpus { + if v == c { + partOfCorpus = true + break + } + } + if !partOfCorpus { + t.Error("InitUniqueString produced a value out of the corpus") + } + } + // Check the values are unique + for i, v1 := range genome { + for j, v2 := range genome { + if i != j && v1 == v2 { + t.Error("InitUniqueString didn't produce unique values") + } + } + } + } +} diff --git a/migration.go b/migration.go new file mode 100644 index 0000000..a603ac0 --- /dev/null +++ b/migration.go @@ -0,0 +1,39 @@ +package gago + +import ( + "errors" + "math/rand" +) + +// Migrator applies crossover to the GA level, as such it doesn't +// require an independent random number generator and can use the global one. +type Migrator interface { + Apply(pops Populations, rng *rand.Rand) + Validate() error +} + +// MigRing migration exchanges individuals between consecutive Populations in a +// random fashion. One by one, each population exchanges NMigrants individuals +// at random with the next population. NMigrants should be higher than the +// number of individuals in each population, else all the individuals will +// migrate and it will be as if nothing happened. +type MigRing struct { + NMigrants int // Number of migrants per exchange between Populations +} + +// Apply MigRing. +func (mig MigRing) Apply(pops Populations, rng *rand.Rand) { + for i := 0; i < len(pops)-1; i++ { + for _, k := range randomInts(mig.NMigrants, 0, len(pops[i].Individuals), rng) { + pops[i].Individuals[k], pops[i+1].Individuals[k] = pops[i+1].Individuals[k], pops[i].Individuals[k] + } + } +} + +// Validate MigRing fields. +func (mig MigRing) Validate() error { + if mig.NMigrants < 1 { + return errors.New("NMigrants should be higher than 0") + } + return nil +} diff --git a/migration_test.go b/migration_test.go new file mode 100644 index 0000000..0a97cc7 --- /dev/null +++ b/migration_test.go @@ -0,0 +1,53 @@ +package gago + +import "testing" + +func TestMigSizes(t *testing.T) { + var ( + rng = newRand() + migrators = []Migrator{ + MigRing{ + NMigrants: 5, + }, + } + ) + for _, migrator := range migrators { + for _, nbrPops := range []int{2, 3, 10} { + var fitnessMeans = make([]float64, nbrPops) + for _, nbrIndis := range []int{6, 10, 30} { + // Instantiate populations + var pops = make([]Population, nbrPops) + for i := range pops { + pops[i] = newPopulation(nbrIndis, NewVector, rng) + pops[i].Individuals.Evaluate(false) + fitnessMeans[i] = pops[i].Individuals.FitAvg() + } + migrator.Apply(pops, rng) + // Check the Population sizes haven't changed + for _, pop := range pops { + if len(pop.Individuals) != nbrIndis { + t.Error("Migration changed the Population sizes") + } + } + // Check the average fitnesses have changed + for i, pop := range pops { + if pop.Individuals.FitAvg() == fitnessMeans[i] { + t.Error("Average fitnesses didn't change") + } + } + } + } + } +} + +func TestMigRingValidate(t *testing.T) { + var mig = MigRing{1} + if err := mig.Validate(); err != nil { + t.Error("Validation should not have raised error") + } + // Set NMigrants lower than 1 + mig.NMigrants = 0 + if err := mig.Validate(); err == nil { + t.Error("Validation should raised error") + } +} diff --git a/models.go b/models.go new file mode 100644 index 0000000..89e627a --- /dev/null +++ b/models.go @@ -0,0 +1,368 @@ +package gago + +import ( + "errors" + "math" + "math/rand" +) + +var ( + errNilSelector = errors.New("Selector cannot be nil") + errInvalidMutRate = errors.New("MutRate should be between 0 and 1") +) + +// Two parents are selected from a pool of individuals, crossover is then +// applied to generate two offsprings. The selection and crossover process is +// repeated until n offsprings have been generated. If n is uneven then the +// second offspring of the last crossover is discarded. +func generateOffsprings(n int, indis Individuals, sel Selector, rng *rand.Rand) (Individuals, error) { + var ( + offsprings = make(Individuals, n) + i = 0 + ) + for i < len(offsprings) { + // Select 2 parents + var selected, _, err = sel.Apply(2, indis, rng) + if err != nil { + return nil, err + } + // Generate 2 offsprings from the parents + selected[0].Crossover(selected[1], rng) + if i < len(offsprings) { + offsprings[i] = selected[0] + i++ + } + if i < len(offsprings) { + offsprings[i] = selected[1] + i++ + } + } + return offsprings, nil +} + +// A Model specifies a protocol for applying genetic operators to a +// population at generation i in order for it obtain better individuals at +// generation i+1. +type Model interface { + Apply(pop *Population) error + Validate() error +} + +// ModGenerational implements the generational model. +type ModGenerational struct { + Selector Selector + MutRate float64 +} + +// Apply ModGenerational. +func (mod ModGenerational) Apply(pop *Population) error { + // Generate as many offsprings as there are of individuals in the current population + var offsprings, err = generateOffsprings( + len(pop.Individuals), + pop.Individuals, + mod.Selector, + pop.RNG, + ) + if err != nil { + return err + } + // Apply mutation to the offsprings + if mod.MutRate > 0 { + offsprings.Mutate(mod.MutRate, pop.RNG) + } + // Replace the old population with the new one + copy(pop.Individuals, offsprings) + return nil +} + +// Validate ModGenerational fields. +func (mod ModGenerational) Validate() error { + // Check the selection method presence + if mod.Selector == nil { + return errNilSelector + } + // Check the selection method parameters + var errSelector = mod.Selector.Validate() + if errSelector != nil { + return errSelector + } + // Check the mutation rate + if mod.MutRate < 0 || mod.MutRate > 1 { + return errInvalidMutRate + } + return nil +} + +// ModSteadyState implements the steady state model. +type ModSteadyState struct { + Selector Selector + KeepBest bool + MutRate float64 +} + +// Apply ModSteadyState. +func (mod ModSteadyState) Apply(pop *Population) error { + var selected, indexes, err = mod.Selector.Apply(2, pop.Individuals, pop.RNG) + if err != nil { + return err + } + var offsprings = selected.Clone(pop.RNG) + offsprings[0].Crossover(offsprings[1], pop.RNG) + // Apply mutation to the offsprings + if mod.MutRate > 0 { + if pop.RNG.Float64() < mod.MutRate { + offsprings[0].Mutate(pop.RNG) + } + if pop.RNG.Float64() < mod.MutRate { + offsprings[1].Mutate(pop.RNG) + } + } + if mod.KeepBest { + // Replace the chosen individuals with the best individuals + offsprings[0].Evaluate() + offsprings[1].Evaluate() + var indis = Individuals{selected[0], selected[1], offsprings[0], offsprings[1]} + indis.SortByFitness() + pop.Individuals[indexes[0]] = indis[0] + pop.Individuals[indexes[1]] = indis[1] + } else { + // Replace the chosen parents with the offsprings + pop.Individuals[indexes[0]] = offsprings[0] + pop.Individuals[indexes[1]] = offsprings[1] + } + return nil +} + +// Validate ModSteadyState fields. +func (mod ModSteadyState) Validate() error { + // Check the selection method presence + if mod.Selector == nil { + return errNilSelector + } + // Check the selection method parameters + var errSelector = mod.Selector.Validate() + if errSelector != nil { + return errSelector + } + // Check the mutation rate in the presence of a mutator + if mod.MutRate < 0 || mod.MutRate > 1 { + return errInvalidMutRate + } + return nil +} + +// ModDownToSize implements the select down to size model. +type ModDownToSize struct { + NOffsprings int + SelectorA Selector + SelectorB Selector + MutRate float64 +} + +// Apply ModDownToSize. +func (mod ModDownToSize) Apply(pop *Population) error { + var offsprings, err = generateOffsprings( + mod.NOffsprings, + pop.Individuals, + mod.SelectorA, + pop.RNG, + ) + if err != nil { + return err + } + // Apply mutation to the offsprings + if mod.MutRate > 0 { + offsprings.Mutate(mod.MutRate, pop.RNG) + } + offsprings.Evaluate(false) + // Merge the current population with the offsprings + offsprings = append(offsprings, pop.Individuals...) + // Select down to size + var selected, _, _ = mod.SelectorB.Apply(len(pop.Individuals), offsprings, pop.RNG) + // Replace the current population of individuals + copy(pop.Individuals, selected) + return nil +} + +// Validate ModDownToSize fields. +func (mod ModDownToSize) Validate() error { + // Check the number of offsprings value + if mod.NOffsprings <= 0 { + return errors.New("NOffsprings has to higher than 0") + } + // Check the first selection method presence + if mod.SelectorA == nil { + return errNilSelector + } + // Check the first selection method parameters + var errSelectorA = mod.SelectorA.Validate() + if errSelectorA != nil { + return errSelectorA + } + // Check the second selection method presence + if mod.SelectorB == nil { + return errNilSelector + } + // Check the second selection method parameters + var errSelectorB = mod.SelectorB.Validate() + if errSelectorB != nil { + return errSelectorB + } + // Check the mutation rate in the presence of a mutator + if mod.MutRate < 0 || mod.MutRate > 1 { + return errInvalidMutRate + } + return nil +} + +// ModRing implements the island ring model. +type ModRing struct { + Selector Selector + MutRate float64 +} + +// Apply ModRing. +func (mod ModRing) Apply(pop *Population) error { + for i := range pop.Individuals { + var ( + indi = pop.Individuals[i].Clone(pop.RNG) + neighbour = pop.Individuals[i%len(pop.Individuals)] + ) + indi.Crossover(neighbour, pop.RNG) + // Apply mutation to the offsprings + if mod.MutRate > 0 { + if pop.RNG.Float64() < mod.MutRate { + indi.Mutate(pop.RNG) + } + if pop.RNG.Float64() < mod.MutRate { + neighbour.Mutate(pop.RNG) + } + } + indi.Evaluate() + neighbour.Evaluate() + // Select an individual out of the original individual and the + // offsprings + var ( + indis = Individuals{pop.Individuals[i], indi, neighbour} + selected, _, err = mod.Selector.Apply(1, indis, pop.RNG) + ) + if err != nil { + return err + } + pop.Individuals[i] = selected[0] + } + return nil +} + +// Validate ModRing fields. +func (mod ModRing) Validate() error { + // Check the selection method presence + if mod.Selector == nil { + return errNilSelector + } + // Check the selection method parameters + var errSelector = mod.Selector.Validate() + if errSelector != nil { + return errSelector + } + // Check the mutation rate in the presence of a mutator + if mod.MutRate < 0 || mod.MutRate > 1 { + return errInvalidMutRate + } + return nil +} + +// ModSimAnn implements simulated annealing. Enhancing a GA with the ModSimAnn +// model only has to be done once for the simulated annealing to do a complete +// run. Successive enhancements will simply reset the temperature and run the +// simulated annealing again (which can potentially stagnate). +type ModSimAnn struct { + T float64 // Starting temperature + Tmin float64 // Stopping temperature + Alpha float64 // Decrease rate per iteration +} + +// Apply ModSimAnn. +func (mod ModSimAnn) Apply(pop *Population) error { + // Continue until having reached the minimum temperature + for mod.T > mod.Tmin { + for i, indi := range pop.Individuals { + // Generate a random neighbour through mutation + var neighbour = indi.Clone(pop.RNG) + neighbour.Mutate(pop.RNG) + neighbour.Evaluate() + if neighbour.Fitness < indi.Fitness { + pop.Individuals[i] = neighbour + } else { + var p = math.Exp((indi.Fitness - neighbour.Fitness) / mod.T) + if p > pop.RNG.Float64() { + pop.Individuals[i] = neighbour + } + } + } + mod.T *= mod.Alpha // Reduce the temperature + } + return nil +} + +// Validate ModSimAnn fields. +func (mod ModSimAnn) Validate() error { + // Check the stopping temperature value + if mod.Tmin < 0 { + return errors.New("Tmin should be higher than 0") + } + // Check the starting temperature value + if mod.T < mod.Tmin { + return errors.New("T should be a number higher than Tmin") + } + // Check the decrease rate value + if mod.Alpha <= 0 || mod.Alpha >= 1 { + return errInvalidMutRate + } + return nil +} + +// ModMutationOnly implements the mutation only model. Each generation, +// NChosen are selected and are replaced with mutants. Mutants are obtained by +// mutating the selected Individuals. If Strict is set to true, then the mutants +// replace the chosen individuals only if they have a lower fitness. +type ModMutationOnly struct { + NChosen int // Number of individuals that are mutated each generation + Selector Selector + Strict bool +} + +// Apply ModMutationOnly. +func (mod ModMutationOnly) Apply(pop *Population) error { + var selected, positions, err = mod.Selector.Apply(mod.NChosen, pop.Individuals, pop.RNG) + if err != nil { + return err + } + for i, indi := range selected { + var mutant = indi.Clone(pop.RNG) + mutant.Mutate(pop.RNG) + mutant.Evaluate() + if !mod.Strict || (mod.Strict && mutant.Fitness > indi.Fitness) { + pop.Individuals[positions[i]] = mutant + } + } + return nil +} + +// Validate ModMutationOnly fields. +func (mod ModMutationOnly) Validate() error { + // Check the number of chosen individuals value + if mod.NChosen < 1 { + return errors.New("NChosen should be higher than 0") + } + // Check the selector presence + if mod.Selector == nil { + return errNilSelector + } + // Check the selection method parameters + var errSelector = mod.Selector.Validate() + if errSelector != nil { + return errSelector + } + return nil +} diff --git a/models_test.go b/models_test.go new file mode 100644 index 0000000..af83695 --- /dev/null +++ b/models_test.go @@ -0,0 +1,206 @@ +package gago + +import "testing" + +var ( + // Valid models + validModels = []Model{ + ModGenerational{ + Selector: SelTournament{1}, + MutRate: 0.2, + }, + ModSteadyState{ + Selector: SelTournament{1}, + KeepBest: false, + MutRate: 0.2, + }, + ModSteadyState{ + Selector: SelTournament{1}, + KeepBest: true, + MutRate: 0.2, + }, + ModDownToSize{ + NOffsprings: 5, + SelectorA: SelTournament{1}, + SelectorB: SelElitism{}, + MutRate: 0.2, + }, + ModRing{ + Selector: SelTournament{1}, + MutRate: 0.2, + }, + ModSimAnn{ + T: 10, + Tmin: 1, + Alpha: 0.3, + }, + ModMutationOnly{ + NChosen: 3, + Selector: SelTournament{1}, + Strict: false, + }, + ModMutationOnly{ + NChosen: 3, + Selector: SelTournament{1}, + Strict: true, + }, + } + // Invalid models + invalidModels = []Model{ + ModGenerational{ + Selector: nil, + MutRate: 0.2, + }, + ModGenerational{ + Selector: SelTournament{0}, + MutRate: 0.2, + }, + ModGenerational{ + Selector: SelTournament{1}, + MutRate: -1, + }, + ModSteadyState{ + Selector: nil, + KeepBest: false, + MutRate: 0.2, + }, + ModSteadyState{ + Selector: SelTournament{0}, + KeepBest: true, + MutRate: 0.2, + }, + ModSteadyState{ + Selector: SelTournament{1}, + KeepBest: true, + MutRate: -1, + }, + ModDownToSize{ + NOffsprings: -1, + SelectorA: SelTournament{1}, + SelectorB: SelElitism{}, + MutRate: 0.2, + }, + ModDownToSize{ + NOffsprings: 5, + SelectorA: nil, + SelectorB: SelElitism{}, + MutRate: 0.2, + }, + ModDownToSize{ + NOffsprings: 5, + SelectorA: SelTournament{0}, + SelectorB: SelElitism{}, + MutRate: 0.2, + }, + ModDownToSize{ + NOffsprings: 5, + SelectorA: SelTournament{1}, + SelectorB: nil, + MutRate: 0.2, + }, + ModDownToSize{ + NOffsprings: 5, + SelectorA: SelTournament{1}, + SelectorB: SelTournament{0}, + MutRate: 0.2, + }, + ModDownToSize{ + NOffsprings: 5, + SelectorA: SelTournament{1}, + SelectorB: SelElitism{}, + MutRate: -1, + }, + ModRing{ + Selector: nil, + MutRate: 0.2, + }, + ModRing{ + Selector: SelTournament{0}, + MutRate: 0.2, + }, + ModRing{ + Selector: SelTournament{1}, + MutRate: -1, + }, + ModSimAnn{ + T: 1, + Tmin: 10, + Alpha: 0.3, + }, + ModSimAnn{ + T: 10, + Tmin: -1, + Alpha: 0.3, + }, + ModSimAnn{ + T: 10, + Tmin: 1, + Alpha: -1, + }, + ModMutationOnly{ + NChosen: -1, + Selector: SelTournament{1}, + Strict: false, + }, + ModMutationOnly{ + NChosen: 3, + Selector: nil, + Strict: false, + }, + ModMutationOnly{ + NChosen: 3, + Selector: SelTournament{0}, + Strict: false, + }, + } +) + +// TestGenerateOffsprings checks that GenerateOffsprings works as intended by +// producing the desired number of offsprings. +func TestGenerateOffsprings(t *testing.T) { + var ( + rng = newRand() + indis = newIndividuals(20, NewVector, rng) + ) + for _, n := range []int{0, 1, 3, 10} { + var offsprings, _ = generateOffsprings(n, indis, SelTournament{1}, rng) + if len(offsprings) != n { + t.Error("GenerateOffsprings didn't produce the expected number of offsprings") + } + } +} + +// TestModelsValidate checks that each model's Validate method doesn't return +// an error in case of a valid model and vice-versa +func TestModelsValidate(t *testing.T) { + // Check valid models do not raise an error + for _, model := range validModels { + if model.Validate() != nil { + t.Error("The model validation should not have raised an error") + } + } + // Check invalid models raise an error + for _, model := range invalidModels { + if model.Validate() == nil { + t.Error("The model validation should have raised error") + } + } +} + +// TestModelsConstantSize checks that each model doesn't change the size of a +// population when applied. +func TestModelsConstantSize(t *testing.T) { + var rng = newRand() + for _, n := range []int{1, 2, 3, 42} { + for _, model := range validModels { + var pop = newPopulation(n, NewVector, rng) + // Check the size of the population doesn't change for a few iterations + for i := 0; i < 5; i++ { + model.Apply(&pop) + if len(pop.Individuals) != n { + t.Error("A model application changed the population size") + } + } + } + } +} diff --git a/mutation.go b/mutation.go new file mode 100644 index 0000000..fc8a240 --- /dev/null +++ b/mutation.go @@ -0,0 +1,87 @@ +package gago + +import ( + "math/rand" +) + +// Type specific mutations for slices + +// MutNormalFloat64 modifies a float64 gene if a coin toss is under a defined +// mutation rate. The new gene value is a random value sampled from a normal +// distribution centered on the gene's current value and with a standard +// deviation proportional to the current value. It does so for each gene. +func MutNormalFloat64(genome []float64, rate float64, rng *rand.Rand) { + for i := range genome { + // Flip a coin and decide to mutate or not + if rng.Float64() < rate { + genome[i] += rng.NormFloat64() * genome[i] + } + } +} + +// MutUniformString picks a gene at random and replaces it with a random from a +// provided corpus. It repeats this n times. +func MutUniformString(genome []string, corpus []string, n int, rng *rand.Rand) { + for i := 0; i < n; i++ { + var ( + element = corpus[rng.Intn(len(corpus))] + pos = rng.Intn(len(genome)) + ) + genome[pos] = element + } +} + +// Generic mutations for slices + +// MutPermute permutes two genes at random n times. +func MutPermute(genome Slice, n int, rng *rand.Rand) { + // Nothing to permute + if genome.Len() <= 1 { + return + } + for i := 0; i < n; i++ { + // Choose two points on the genome + var points = randomInts(2, 0, genome.Len(), rng) + genome.Swap(points[0], points[1]) + } +} + +// MutPermuteInt calls MutPermute on an int slice. +func MutPermuteInt(s []int, n int, rng *rand.Rand) { + MutPermute(IntSlice(s), n, rng) +} + +// MutPermuteFloat64 calls MutPermute on a float64 slice. +func MutPermuteFloat64(s []float64, n int, rng *rand.Rand) { + MutPermute(Float64Slice(s), n, rng) +} + +// MutPermuteString callsMutPermute on a string slice. +func MutPermuteString(s []string, n int, rng *rand.Rand) { + MutPermute(StringSlice(s), n, rng) +} + +// MutSplice splits a genome in 2 and glues the pieces back together in reverse +// order. +func MutSplice(genome Slice, rng *rand.Rand) { + var ( + k = rng.Intn(genome.Len()-1) + 1 + a, b = genome.Split(k) + ) + genome.Replace(b.Append(a)) +} + +// MutSpliceInt calls MutSplice on an int slice. +func MutSpliceInt(s []int, rng *rand.Rand) { + MutSplice(IntSlice(s), rng) +} + +// MutSpliceFloat64 calls MutSplice on a float64 slice. +func MutSpliceFloat64(s []float64, rng *rand.Rand) { + MutSplice(Float64Slice(s), rng) +} + +// MutSpliceString calls MutSplice on a string slice. +func MutSpliceString(s []string, rng *rand.Rand) { + MutSplice(StringSlice(s), rng) +} diff --git a/mutation_test.go b/mutation_test.go new file mode 100644 index 0000000..d86d269 --- /dev/null +++ b/mutation_test.go @@ -0,0 +1,231 @@ +package gago + +import ( + "testing" +) + +func sliceContainsInt(x int, ints []int) bool { + for _, v := range ints { + if v == x { + return true + } + } + return false +} + +func sliceContainsFloat64(x float64, floats []float64) bool { + for _, v := range floats { + if v == x { + return true + } + } + return false +} + +func sliceContainsString(x string, strings []string) bool { + for _, v := range strings { + if v == x { + return true + } + } + return false +} + +func TestMutNormalFloat64All(t *testing.T) { + var ( + rng = newRand() + genome = []float64{1, 2, 3} + mutated = make([]float64, len(genome)) + ) + copy(mutated, genome) + MutNormalFloat64(mutated, 1, rng) + for i, v := range mutated { + if v == genome[i] { + t.Error("Gene should have been modified but hasn't") + } + } +} + +func TestMutNormalFloat64None(t *testing.T) { + var ( + rng = newRand() + genome = []float64{1, 2, 3} + mutated = make([]float64, len(genome)) + ) + copy(mutated, genome) + MutNormalFloat64(mutated, 0, rng) + for i, v := range mutated { + if v != genome[i] { + t.Error("Gene has been modified but shouldn't have") + } + } +} + +func TestMutUniformString(t *testing.T) { + var ( + rng = newRand() + genome = []string{"a", "b", "c"} + corpus = []string{"d", "e", "f"} + mutated = make([]string, len(genome)) + ) + copy(mutated, genome) + MutUniformString(mutated, corpus, 3, rng) + // Check the length of the mutated genome is consistent + if len(mutated) != len(genome) { + t.Error("Mutated genome has the wrong length") + } + // Check the new genes are present in the previous genome or in the corpus + for _, v := range mutated { + var inGenome = false + for _, gene := range genome { + if gene == v { + inGenome = true + } + } + var inCorpus = false + for _, element := range corpus { + if element == v { + inCorpus = true + } + } + if !inGenome && !inCorpus { + t.Error("New genome is not present in previous genome or in corpus") + } + } +} + +func TestMutPermuteSingleGene(t *testing.T) { + var ( + rng = newRand() + genome = []int{42} + ) + MutPermuteInt(genome, 1, rng) + // Check the length of the mutated genome + if len(genome) != 1 { + t.Error("Mutated genome has the wrong length") + } + // Check the value of the mutated genome + for genome[0] != 42 { + t.Error("Mutated genome has the wrong value") + } +} + +func TestMutPermuteInt(t *testing.T) { + var ( + rng = newRand() + genome = []int{1, 2, 3} + mutated = make([]int, len(genome)) + ) + copy(mutated, genome) + MutPermuteInt(mutated, 3, rng) + // Check the length of the mutated genome is consistent + if len(mutated) != len(genome) { + t.Error("Mutated genome has the wrong length") + } + // Check the genes in the initial genome are still present + for _, v := range genome { + if !sliceContainsInt(v, mutated) { + t.Error("Gene in initial genome has disappeared") + } + } +} + +func TestMutPermuteFloat64(t *testing.T) { + var ( + rng = newRand() + genome = []float64{1, 2, 3} + mutated = make([]float64, len(genome)) + ) + copy(mutated, genome) + MutPermuteFloat64(mutated, 3, rng) + // Check the length of the mutated genome is consistent + if len(mutated) != len(genome) { + t.Error("Mutated genome has the wrong length") + } + // Check the genes in the initial genome are still present + for _, v := range genome { + if !sliceContainsFloat64(v, mutated) { + t.Error("Gene in initial genome has disappeared") + } + } +} + +func TestMutPermuteString(t *testing.T) { + var ( + rng = newRand() + genome = []string{"a", "b", "c"} + mutated = make([]string, len(genome)) + ) + copy(mutated, genome) + MutPermuteString(mutated, 3, rng) + // Check the length of the mutated genome is consistent + if len(mutated) != len(genome) { + t.Error("Mutated genome has the wrong length") + } + // Check the genes in the initial genome are still present + for _, v := range genome { + if !sliceContainsString(v, mutated) { + t.Error("Gene in initial genome has disappeared") + } + } +} + +func TestMutSpliceInt(t *testing.T) { + var ( + rng = newRand() + genome = []int{1, 2, 3} + mutated = make([]int, len(genome)) + ) + copy(mutated, genome) + MutSpliceInt(mutated, rng) + // Check the length of the mutated genome is consistent + if len(mutated) != len(genome) { + t.Error("Mutated genome has the wrong length") + } + // Check the genes in the initial genome are still present + for _, v := range genome { + if !sliceContainsInt(v, mutated) { + t.Error("Gene in initial genome has disappeared") + } + } +} + +func TestMutSpliceFloat64(t *testing.T) { + var ( + rng = newRand() + genome = []float64{1, 2, 3} + mutated = make([]float64, len(genome)) + ) + copy(mutated, genome) + MutSpliceFloat64(mutated, rng) + // Check the length of the mutated genome is consistent + if len(mutated) != len(genome) { + t.Error("Mutated genome has the wrong length") + } + // Check the genes in the initial genome are still present + for _, v := range genome { + if !sliceContainsFloat64(v, mutated) { + t.Error("Gene in initial genome has disappeared") + } + } +} + +func TestMutSpliceString(t *testing.T) { + var ( + rng = newRand() + genome = []string{"a", "b", "c"} + mutated = make([]string, len(genome)) + ) + copy(mutated, genome) + MutSpliceString(mutated, rng) + // Check the length of the mutated genome is consistent + if len(mutated) != len(genome) { + t.Error("Mutated genome has the wrong length") + } + // Check the genes in the initial genome are still present + for _, v := range genome { + if !sliceContainsString(v, mutated) { + t.Error("Gene in initial genome has disappeared") + } + } +} diff --git a/population.go b/population.go new file mode 100644 index 0000000..a71d1d6 --- /dev/null +++ b/population.go @@ -0,0 +1,70 @@ +package gago + +import ( + "log" + "math/rand" + "time" + + "golang.org/x/sync/errgroup" +) + +// A Population contains individuals. Individuals mate within a population. +// Individuals can migrate from one population to another. Each population has a +// random number generator to bypass the global rand mutex. +type Population struct { + Individuals Individuals `json:"indis"` + Age time.Duration `json:"age"` + Generations int `json:"generations"` + ID string `json:"id"` + RNG *rand.Rand +} + +// Generate a new population. +func newPopulation(size int, newGenome NewGenome, RNG *rand.Rand) Population { + var ( + popRNG = rand.New(rand.NewSource(RNG.Int63())) + pop = Population{ + Individuals: newIndividuals(size, newGenome, popRNG), + ID: randString(3, popRNG), + RNG: popRNG, + } + ) + return pop +} + +// Log a Population's current statistics with a provided log.Logger. +func (pop Population) Log(logger *log.Logger) { + logger.Printf( + "pop_id=%s min=%f max=%f avg=%f std=%f", + pop.ID, + pop.Individuals.FitMin(), + pop.Individuals.FitMax(), + pop.Individuals.FitAvg(), + pop.Individuals.FitStd(), + ) +} + +// Populations type is necessary for migration and speciation purposes. +type Populations []Population + +// Apply a function to a slice of Populations. +func (pops Populations) Apply(f func(pop *Population) error, parallel bool) error { + if parallel { + var g errgroup.Group + for i := range pops { + i := i // https://golang.org/doc/faq#closures_and_goroutines + g.Go(func() error { + return f(&pops[i]) + }) + } + return g.Wait() + } + var err error + for i := range pops { + err = f(&pops[i]) + if err != nil { + return err + } + } + return err +} diff --git a/presets.go b/presets.go new file mode 100644 index 0000000..96602d0 --- /dev/null +++ b/presets.go @@ -0,0 +1,46 @@ +package gago + +// Generational returns a GA instance that uses the generational model. +func Generational(NewGenome NewGenome) GA { + return GA{ + NewGenome: NewGenome, + NPops: 2, + PopSize: 50, + Model: ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: 0.5, + }, + } +} + +// SimulatedAnnealing returns a GA instance that mimicks a basic simulated +// annealing procedure. +func SimulatedAnnealing(NewGenome NewGenome) GA { + return GA{ + NewGenome: NewGenome, + NPops: 1, + PopSize: 1, + Model: ModSimAnn{ + T: 100, // Starting temperature + Tmin: 1, // Stopping temperature + Alpha: 0.99, // Decrease rate per iteration + }, + } +} + +// HillClimbing returns a GA instance that mimicks a basic hill-climbing +// procedure. +func HillClimbing(NewGenome NewGenome) GA { + return GA{ + NewGenome: NewGenome, + NPops: 1, + PopSize: 1, + Model: ModMutationOnly{ + NChosen: 1, + Selector: SelElitism{}, + Strict: true, + }, + } +} diff --git a/presets_test.go b/presets_test.go new file mode 100644 index 0000000..a3137b4 --- /dev/null +++ b/presets_test.go @@ -0,0 +1,17 @@ +package gago + +import "testing" + +var presets = []GA{ + Generational(NewVector), + SimulatedAnnealing(NewVector), + HillClimbing(NewVector), +} + +func TestPresetsValid(t *testing.T) { + for _, preset := range presets { + if preset.Validate() != nil { + t.Error("The preset parameters are invalid") + } + } +} diff --git a/selection.go b/selection.go new file mode 100644 index 0000000..122ec38 --- /dev/null +++ b/selection.go @@ -0,0 +1,118 @@ +package gago + +import ( + "errors" + "fmt" + "math/rand" + "sort" +) + +// Selector chooses a subset of size n from a group of individuals. The group of +// individuals a Selector is applied to is expected to be sorted. +type Selector interface { + Apply(n int, indis Individuals, rng *rand.Rand) (selected Individuals, indexes []int, err error) + Validate() error +} + +// SelElitism selection returns the n best individuals of a group. +type SelElitism struct{} + +// Apply SelElitism. +func (sel SelElitism) Apply(n int, indis Individuals, rng *rand.Rand) (Individuals, []int, error) { + indis.SortByFitness() + return indis[:n].Clone(rng), newInts(n), nil +} + +// Validate SelElitism fields. +func (sel SelElitism) Validate() error { + return nil +} + +// SelTournament samples individuals through tournament selection. The +// tournament is composed of randomly chosen individuals. The winner of the +// tournament is the chosen individual with the lowest fitness. The obtained +// individuals are all distinct. +type SelTournament struct { + NContestants int +} + +// Apply SelTournament. +func (sel SelTournament) Apply(n int, indis Individuals, rng *rand.Rand) (Individuals, []int, error) { + // Check that the number of individuals is large enough + if len(indis)-n < sel.NContestants-1 { + return nil, nil, fmt.Errorf("Not enough individuals to select %d "+ + "with NContestants = %d, have %d individuals and need at least %d", + n, sel.NContestants, len(indis), sel.NContestants+n-1) + } + var ( + winners = make(Individuals, n) + indexes = make([]int, n) + notSelectedIdxs = newInts(len(indis)) + ) + for i := range winners { + // Sample contestants + var ( + contestants, idxs, _ = sampleInts(notSelectedIdxs, sel.NContestants, rng) + winnerIdx int + ) + // Find the best contestant + winners[i] = indis[contestants[0]] + winners[i].Evaluate() + for j, idx := range contestants[1:] { + if indis[idx].GetFitness() < winners[i].Fitness { + winners[i] = indis[idx] + indexes[i] = idx + winnerIdx = idxs[j] + } + } + // Ban the winner from re-participating + notSelectedIdxs = append(notSelectedIdxs[:winnerIdx], notSelectedIdxs[winnerIdx+1:]...) + } + return winners.Clone(rng), indexes, nil +} + +// Validate SelTournament fields. +func (sel SelTournament) Validate() error { + if sel.NContestants < 1 { + return errors.New("NContestants should be higher than 0") + } + return nil +} + +// SelRoulette samples individuals through roulette wheel selection (also known +// as fitness proportionate selection). +type SelRoulette struct{} + +func buildWheel(fitnesses []float64) []float64 { + var ( + n = len(fitnesses) + wheel = make([]float64, n) + ) + for i, v := range fitnesses { + wheel[i] = fitnesses[n-1] - v + 1 + } + return cumsum(divide(wheel, sumFloat64s(wheel))) +} + +// Apply SelRoulette. +func (sel SelRoulette) Apply(n int, indis Individuals, rng *rand.Rand) (Individuals, []int, error) { + var ( + selected = make(Individuals, n) + indexes = make([]int, n) + wheel = buildWheel(indis.getFitnesses()) + ) + for i := range selected { + var ( + index = sort.SearchFloat64s(wheel, rand.Float64()) + winner = indis[index] + ) + indexes[i] = index + selected[i] = winner + } + return selected.Clone(rng), indexes, nil +} + +// Validate SelRoulette fields. +func (sel SelRoulette) Validate() error { + return nil +} diff --git a/selection_test.go b/selection_test.go new file mode 100644 index 0000000..7776e54 --- /dev/null +++ b/selection_test.go @@ -0,0 +1,120 @@ +package gago + +import ( + "fmt" + "testing" +) + +var ( + validSelectors = []Selector{ + SelElitism{}, + SelTournament{3}, + SelRoulette{}, + } + invalidSelectors = []Selector{ + SelTournament{0}, + SelTournament{-1}, + } +) + +func TestSelectionSize(t *testing.T) { + var ( + rng = newRand() + indis = newIndividuals(30, NewVector, rng) + selectors = []Selector{ + SelTournament{ + NContestants: 3, + }, + SelElitism{}, + } + ) + for _, selector := range selectors { + for _, n := range []int{3, 10, 20} { + var selected, _, _ = selector.Apply(n, indis, rng) + if len(selected) != n { + t.Error("Selector didn't select the expected number of individuals") + } + } + } +} + +func TestSelElitism(t *testing.T) { + var ( + rng = newRand() + indis = newIndividuals(30, NewVector, rng) + selector = SelElitism{} + ) + indis.Evaluate(false) + for _, n := range []int{1, 2, 10, 30} { + var _, indexes, _ = selector.Apply(n, indis, rng) + for i, index := range indexes { + if index != i { + t.Error("SelElitism didn't select the expected individuals") + } + } + } +} + +func TestSelTournament(t *testing.T) { + var ( + rng = newRand() + indis = newIndividuals(30, NewVector, rng) + ) + indis.Evaluate(false) + var selected, _, _ = SelTournament{len(indis)}.Apply(1, indis, rng) + if selected[0].Fitness != indis.FitMin() { + t.Error("Full SelTournament didn't select the best individual") + } +} + +func TestBuildWheel(t *testing.T) { + var testCases = []struct { + fitnesses []float64 + weights []float64 + }{ + {[]float64{-10, -8, -5}, []float64{6.0 / 11, 10.0 / 11, 1}}, + {[]float64{-2, 0, 2, 3}, []float64{6.0 / 13, 10.0 / 13, 12.0 / 13, 1}}, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var weights = buildWheel(tc.fitnesses) + for i := range weights { + if weights[i] != tc.weights[i] { + t.Error("buildWheel didn't work as expected") + } + } + }) + } +} + +func TestSelRoulette(t *testing.T) { + var ( + rng = newRand() + indis = newIndividuals(30, NewVector, rng) + sel = SelRoulette{} + ) + indis.Evaluate(false) + for _, n := range []int{0, 1, 10, 30} { + var selected, _, _ = sel.Apply(n, indis, rng) + if len(selected) != n { + t.Error("SelRoulette didn't select the right number of individuals") + } + } +} + +// TestSelectorsValidate checks that each selector's Validate method doesn't +// return an error in case of a valid model and that it does for invalid models. +func TestSelectorsValidate(t *testing.T) { + // Check valid selectors do not raise an error + for _, sel := range validSelectors { + if sel.Validate() != nil { + t.Error("The selector validation should not have raised an error") + } + } + // Check invalid selectors raise an error + for _, sel := range invalidSelectors { + if sel.Validate() == nil { + t.Error("The selector validation should have raised error") + } + } +} diff --git a/setup_test.go b/setup_test.go new file mode 100644 index 0000000..7d9251c --- /dev/null +++ b/setup_test.go @@ -0,0 +1,103 @@ +package gago + +import ( + "errors" + "log" + "math" + "math/rand" + "os" + "time" +) + +var ( + ga = GA{ + NewGenome: NewVector, + NPops: 2, + PopSize: 50, + Model: ModGenerational{ + Selector: SelTournament{ + NContestants: 3, + }, + MutRate: 0.5, + }, + Migrator: MigRing{10}, + MigFrequency: 3, + Logger: log.New(os.Stdin, "", log.Ldate|log.Ltime), + } + nbrGenerations = 5 // Initial number of generations to enhance +) + +// newRand returns a new random number generator with a random seed. +func newRand() *rand.Rand { + return rand.New(rand.NewSource(time.Now().UnixNano())) +} + +func init() { + ga.Initialize() + for i := 0; i < nbrGenerations; i++ { + ga.Evolve() + } +} + +type Vector []float64 + +// Implement the Genome interface + +func (X Vector) Evaluate() float64 { + var sum float64 + for _, x := range X { + sum += x + } + return sum +} + +func (X Vector) Mutate(rng *rand.Rand) { + MutNormalFloat64(X, 0.5, rng) +} + +func (X Vector) Crossover(Y Genome, rng *rand.Rand) { + CrossUniformFloat64(X, Y.(Vector), rng) +} + +func (X Vector) Clone() Genome { + var XX = make(Vector, len(X)) + copy(XX, X) + return XX +} + +func NewVector(rng *rand.Rand) Genome { + return Vector(InitUnifFloat64(4, -10, 10, rng)) +} + +// Minkowski distance with p = 1 +func l1Distance(x1, x2 Individual) (dist float64) { + var g1 = x1.Genome.(Vector) + var g2 = x2.Genome.(Vector) + for i := range g1 { + dist += math.Abs(g1[i] - g2[i]) + } + return +} + +// Identity model + +type ModIdentity struct{} + +func (mod ModIdentity) Apply(pop *Population) error { return nil } +func (mod ModIdentity) Validate() error { return nil } + +// Runtime error model + +type ModRuntimeError struct{} + +func (mod ModRuntimeError) Apply(pop *Population) error { return errors.New("") } +func (mod ModRuntimeError) Validate() error { return nil } + +// Runtime error speciator + +type SpecRuntimeError struct{} + +func (spec SpecRuntimeError) Apply(indis Individuals, rng *rand.Rand) ([]Individuals, error) { + return []Individuals{indis, indis}, errors.New("") +} +func (spec SpecRuntimeError) Validate() error { return nil } diff --git a/slice.go b/slice.go new file mode 100644 index 0000000..67f7fce --- /dev/null +++ b/slice.go @@ -0,0 +1,227 @@ +package gago + +import "errors" + +// A Slice is a genome with a list-like structure. +type Slice interface { + At(i int) interface{} + Set(i int, v interface{}) + Len() int + Swap(i, j int) + Slice(a, b int) Slice + Split(k int) (Slice, Slice) + Append(Slice) Slice + Replace(Slice) + Copy() Slice +} + +// Search for the first index of an element in a Slice. +func search(v interface{}, s Slice) (int, error) { + for i := 0; i < s.Len(); i++ { + if s.At(i) == v { + return i, nil + } + } + // Element not in slice + return 0, errors.New("Value not contained in slice") +} + +// Make a lookup table from a slice, mapping values to indexes. +func newIndexLookup(s Slice) map[interface{}]int { + var lookup = make(map[interface{}]int) + for i := 0; i < s.Len(); i++ { + lookup[s.At(i)] = i + } + return lookup +} + +// getCycles determines the cycles that exist between two slices. A cycle is a +// list of indexes indicating mirroring values between each slice. +func getCycles(s1, s2 Slice) (cycles [][]int) { + var ( + s1Lookup = newIndexLookup(s1) // Matches values to indexes for quick lookup + visited = make(map[int]bool) // Indicates if an index is already in a cycle or not + ) + for i := 0; i < s1.Len(); i++ { + if !visited[i] { + visited[i] = true + var ( + cycle = []int{i} + j = s1Lookup[s2.At(i)] + ) + // Continue building the cycle until it closes in on itself + for j != cycle[0] { + cycle = append(cycle, j) + visited[j] = true + j = s1Lookup[s2.At(j)] + } + cycles = append(cycles, cycle) + } + } + return +} + +// getNeighbours converts a slice into an adjacency map mapping values to left +// and right neighbours. The values of the map are sets. +func getNeighbours(s Slice) map[interface{}]set { + var ( + neighbours = make(map[interface{}]set) + n = s.Len() + ) + neighbours[s.At(0)] = set{s.At(n - 1): true, s.At(1): true} + for i := 1; i < n-1; i++ { + neighbours[s.At(i)] = set{s.At(i - 1): true, s.At(i + 1): true} + } + neighbours[s.At(n-1)] = set{s.At(n - 2): true, s.At(0): true} + return neighbours +} + +// IntSlice attaches the methods of Slice to []float64 +type IntSlice []int + +// At method from Slice +func (s IntSlice) At(i int) interface{} { + return s[i] +} + +// Set method from Slice +func (s IntSlice) Set(i int, v interface{}) { + s[i] = v.(int) +} + +// Len method from Slice +func (s IntSlice) Len() int { + return len(s) +} + +// Swap method from Slice +func (s IntSlice) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +// Slice method from Slice +func (s IntSlice) Slice(a, b int) Slice { + return s[a:b] +} + +// Split method from Slice +func (s IntSlice) Split(k int) (Slice, Slice) { + return s[:k], s[k:] +} + +// Append method from Slice +func (s IntSlice) Append(t Slice) Slice { + return append(s, t.(IntSlice)...) +} + +// Replace method from Slice +func (s IntSlice) Replace(t Slice) { + copy(s, t.(IntSlice)) +} + +// Copy method from Slice +func (s IntSlice) Copy() Slice { + var t = make(IntSlice, len(s)) + copy(t, s) + return t +} + +// Float64Slice attaches the methods of Slice to []float64 +type Float64Slice []float64 + +// At method from Slice +func (s Float64Slice) At(i int) interface{} { + return s[i] +} + +// Set method from Slice +func (s Float64Slice) Set(i int, v interface{}) { + s[i] = v.(float64) +} + +// Len method from Slice +func (s Float64Slice) Len() int { + return len(s) +} + +// Swap method from Slice +func (s Float64Slice) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +// Slice method from Slice +func (s Float64Slice) Slice(a, b int) Slice { + return s[a:b] +} + +// Split method from Slice +func (s Float64Slice) Split(k int) (Slice, Slice) { + return s[:k], s[k:] +} + +// Append method from Slice +func (s Float64Slice) Append(t Slice) Slice { + return append(s, t.(Float64Slice)...) +} + +// Replace method from Slice +func (s Float64Slice) Replace(t Slice) { + copy(s, t.(Float64Slice)) +} + +// Copy method from Slice +func (s Float64Slice) Copy() Slice { + var t = make(Float64Slice, len(s)) + copy(t, s) + return t +} + +// StringSlice attaches the methods of Slice to []float64 +type StringSlice []string + +// At method from Slice +func (s StringSlice) At(i int) interface{} { + return s[i] +} + +// Set method from Slice +func (s StringSlice) Set(i int, v interface{}) { + s[i] = v.(string) +} + +// Len method from Slice +func (s StringSlice) Len() int { + return len(s) +} + +// Swap method from Slice +func (s StringSlice) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} + +// Slice method from Slice +func (s StringSlice) Slice(a, b int) Slice { + return s[a:b] +} + +// Split method from Slice +func (s StringSlice) Split(k int) (Slice, Slice) { + return s[:k], s[k:] +} + +// Append method from Slice +func (s StringSlice) Append(t Slice) Slice { + return append(s, t.(StringSlice)...) +} + +// Replace method from Slice +func (s StringSlice) Replace(t Slice) { + copy(s, t.(StringSlice)) +} + +// Copy method from Slice +func (s StringSlice) Copy() Slice { + var t = make(StringSlice, len(s)) + copy(t, s) + return t +} diff --git a/slice_test.go b/slice_test.go new file mode 100644 index 0000000..9a58c77 --- /dev/null +++ b/slice_test.go @@ -0,0 +1,301 @@ +package gago + +import ( + "fmt" + "testing" +) + +func TestSearch(t *testing.T) { + var s = StringSlice{"イ", "ー", "ス", "タ", "ー"} + if i, err := search("イ", s); i != 0 || err != nil { + t.Error("Problem with search 1") + } + if i, err := search("ー", s); i != 1 || err != nil { + t.Error("Problem with search 2") + } + if i, err := search("ス", s); i != 2 || err != nil { + t.Error("Problem with search 3") + } + if i, err := search("タ", s); i != 3 || err != nil { + t.Error("Problem with search 4") + } + if _, err := search("|", s); err == nil { + t.Error("Problem with search 5") + } +} + +func TestNewIndexLookup(t *testing.T) { + var testCases = []struct { + slice Slice + lookup map[interface{}]int + }{ + { + slice: IntSlice{1, 2, 3}, + lookup: map[interface{}]int{ + 1: 0, + 2: 1, + 3: 2, + }, + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var lookup = newIndexLookup(tc.slice) + for k, v := range lookup { + if v != tc.lookup[k] { + t.Error("createLookup didn't work as expected") + } + } + }) + } +} + +func TestGetCycles(t *testing.T) { + var testCases = []struct { + x []int + y []int + cycles [][]int + }{ + { + x: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + y: []int{9, 3, 7, 8, 2, 6, 5, 1, 4}, + cycles: [][]int{ + []int{0, 8, 3, 7}, + []int{1, 2, 6, 4}, + []int{5}, + }, + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var cycles = getCycles(IntSlice(tc.x), IntSlice(tc.y)) + for i, cycle := range cycles { + for j, c := range cycle { + if c != tc.cycles[i][j] { + t.Error("getCycles didn't work as expected") + } + } + } + }) + } +} + +func TestGetNeighbours(t *testing.T) { + var testCases = []struct { + x Slice + neighbours map[interface{}]set + }{ + { + x: IntSlice{1, 2, 3, 4, 5, 6, 7, 8, 9}, + neighbours: map[interface{}]set{ + 1: set{9: true, 2: true}, + 2: set{1: true, 3: true}, + 3: set{2: true, 4: true}, + 4: set{3: true, 5: true}, + 5: set{4: true, 6: true}, + 6: set{5: true, 7: true}, + 7: set{6: true, 8: true}, + 8: set{7: true, 9: true}, + 9: set{8: true, 1: true}, + }, + }, + } + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var neighbours = getNeighbours(tc.x) + for i, set := range neighbours { + for j := range set { + if !tc.neighbours[i][j] { + t.Error("getNeighbours didn't work as expected") + } + } + } + }) + } +} + +func TestIntSliceAt(t *testing.T) { + var ints = IntSlice{1, 2, 3} + if ints.At(0) != 1 || ints.At(1) != 2 || ints.At(2) != 3 { + t.Error("IntSlice At method has unexpected behavior") + } +} + +func TestIntSliceLen(t *testing.T) { + var ints = IntSlice{1, 2, 3} + if ints.Len() != 3 { + t.Error("IntSlice Len method has unexpected behavior") + } +} + +func TestIntSliceSwap(t *testing.T) { + var ints = IntSlice{1, 2, 3} + ints.Swap(0, 2) + if ints.At(0) != 3 || ints.At(2) != 1 { + t.Error("IntSlice Swap method has unexpected behavior") + } +} + +func TestIntSliceSlice(t *testing.T) { + var ints = IntSlice{1, 2, 3}.Slice(1, 2) + if ints.Len() != 1 || ints.At(0) != 2 { + t.Error("IntSlice Slice method has unexpected behavior") + } +} + +func TestIntSliceSplit(t *testing.T) { + var a, b = IntSlice{1, 2, 3}.Split(1) + if a.Len() != 1 || b.Len() != 2 || a.At(0) != 1 || b.At(0) != 2 || b.At(1) != 3 { + t.Error("IntSlice Split method has unexpected behavior") + } +} + +func TestIntSliceAppend(t *testing.T) { + var ints = IntSlice{1}.Append(IntSlice{2}) + if ints.Len() != 2 || ints.At(0) != 1 || ints.At(1) != 2 { + t.Error("IntSlice Append method has unexpected behavior") + } +} + +func TestIntSliceReplace(t *testing.T) { + var ints = IntSlice{1} + ints.Replace(IntSlice{2}) + if ints.Len() != 1 || ints.At(0) != 2 { + t.Error("IntSlice Replace method has unexpected behavior") + } +} + +func TestIntSliceCopy(t *testing.T) { + var ( + ints = IntSlice{1} + clone = ints.Copy() + ) + clone.Replace(IntSlice{2}) + if ints.At(0) != 1 { + t.Error("IntSlice Copy method has unexpected behavior") + } +} + +func TestFloat64SliceAt(t *testing.T) { + var floats = Float64Slice{1, 2, 3} + if floats.At(0) != 1.0 || floats.At(1) != 2.0 || floats.At(2) != 3.0 { + t.Error("Float64Slice At method has unexpected behavior") + } +} + +func TestFloat64SliceLen(t *testing.T) { + var floats = Float64Slice{1, 2, 3} + if floats.Len() != 3 { + t.Error("Float64Slice Len method has unexpected behavior") + } +} + +func TestFloat64SliceSwap(t *testing.T) { + var floats = Float64Slice{1, 2, 3} + floats.Swap(0, 2) + if floats.At(0) != 3.0 || floats.At(2) != 1.0 { + t.Error("Float64Slice Swap method has unexpected behavior") + } +} + +func TestFloat64SliceSlice(t *testing.T) { + var floats = Float64Slice{1, 2, 3}.Slice(1, 2) + if floats.Len() != 1 || floats.At(0) != 2.0 { + t.Error("Float64Slice Slice method has unexpected behavior") + } +} + +func TestFloat64SliceSplit(t *testing.T) { + var a, b = Float64Slice{1, 2, 3}.Split(1) + if a.Len() != 1 || b.Len() != 2 || a.At(0) != 1.0 || b.At(0) != 2.0 || b.At(1) != 3.0 { + t.Error("Float64Slice Split method has unexpected behavior") + } +} + +func TestFloat64SliceAppend(t *testing.T) { + var floats = Float64Slice{1}.Append(Float64Slice{2}) + if floats.Len() != 2 || floats.At(0) != 1.0 || floats.At(1) != 2.0 { + t.Error("Float64Slice Append method has unexpected behavior") + } +} + +func TestFloat64SliceReplace(t *testing.T) { + var floats = Float64Slice{1} + floats.Replace(Float64Slice{2}) + if floats.Len() != 1 || floats.At(0) != 2.0 { + t.Error("Float64Slice Replace method has unexpected behavior") + } +} + +func TestFloat64SliceCopy(t *testing.T) { + var ( + floats = Float64Slice{1} + clone = floats.Copy() + ) + clone.Replace(Float64Slice{2}) + if floats.At(0) != 1.0 { + t.Error("IntSlice Copy method has unexpected behavior") + } +} + +func TestStringSliceAt(t *testing.T) { + var strings = StringSlice{"a", "b", "c"} + if strings.At(0) != "a" || strings.At(1) != "b" || strings.At(2) != "c" { + t.Error("StringSlice At method has unexpected behavior") + } +} + +func TestStringSliceLen(t *testing.T) { + var strings = StringSlice{"a", "b", "c"} + if strings.Len() != 3 { + t.Error("StringSlice Len method has unexpected behavior") + } +} + +func TestStringSliceSwap(t *testing.T) { + var strings = StringSlice{"a", "b", "c"} + strings.Swap(0, 2) + if strings.At(0) != "c" || strings.At(2) != "a" { + t.Error("StringSlice Swap method has unexpected behavior") + } +} + +func TestStringSliceSlice(t *testing.T) { + var strings = StringSlice{"a", "b", "c"}.Slice(1, 2) + if strings.Len() != 1 || strings.At(0) != "b" { + t.Error("StringSlice Slice method has unexpected behavior") + } +} + +func TestStringSliceSplit(t *testing.T) { + var a, b = StringSlice{"a", "b", "c"}.Split(1) + if a.Len() != 1 || b.Len() != 2 || a.At(0) != "a" || b.At(0) != "b" || b.At(1) != "c" { + t.Error("StringSlice Split method has unexpected behavior") + } +} + +func TestStringSliceAppend(t *testing.T) { + var strings = StringSlice{"a"}.Append(StringSlice{"b"}) + if strings.Len() != 2 || strings.At(0) != "a" || strings.At(1) != "b" { + t.Error("StringSlice Append method has unexpected behavior") + } +} + +func TestStringSliceReplace(t *testing.T) { + var strings = StringSlice{"a"} + strings.Replace(StringSlice{"b"}) + if strings.Len() != 1 || strings.At(0) != "b" { + t.Error("StringSlice Replace method has unexpected behavior") + } +} + +func TestStringSliceCopy(t *testing.T) { + var ( + strings = StringSlice{"a"} + clone = strings.Copy() + ) + clone.Replace(StringSlice{"b"}) + if strings.At(0) == "b" { + t.Error("StringSlice Copy method has unexpected behavior") + } +} diff --git a/speciation.go b/speciation.go new file mode 100644 index 0000000..c514e7c --- /dev/null +++ b/speciation.go @@ -0,0 +1,138 @@ +package gago + +import ( + "errors" + "fmt" + "math" + "math/rand" +) + +// A Speciator partitions a population into n smaller subpopulations. Each +// subpopulation shares the same random number generator inherited from the +// initial population. +type Speciator interface { + Apply(indis Individuals, rng *rand.Rand) ([]Individuals, error) + Validate() error +} + +// SpecKMedoids (k-medoid clustering). +type SpecKMedoids struct { + K int // Number of medoids + MinPerCluster int + Metric Metric // Dissimimilarity measure + MaxIterations int +} + +// Apply SpecKMedoids. +func (spec SpecKMedoids) Apply(indis Individuals, rng *rand.Rand) ([]Individuals, error) { + // Check there are at least K Individuals + if len(indis) < spec.K { + return nil, fmt.Errorf("SpecKMedoids: have %d individuals and need at least %d", + len(indis), spec.K) + } + var ( + species = make([]Individuals, spec.K) + medoids = make(Individuals, spec.K) + dm = newDistanceMemoizer(spec.Metric) + ) + // Initialize the clusters with the individuals having the lowest average + // distances with the other individuals + indis.SortByDistanceToMedoid(dm) + copy(medoids, indis[:spec.K]) + // Add each medoid to a cluster + for i, medoid := range medoids { + species[i] = append(species[i], medoid) + } + // Keep track of the total distance from the medoid to each of the cluster's + // members, this total will then be used to compare with different cluster + // dispositions + var total float64 + // Assign each individual that is not a medoid to the closest initial medoid + for _, indi := range indis[spec.K:] { + var i = indi.IdxOfClosest(medoids, dm) + species[i] = append(species[i], indi) + total += dm.GetDistance(medoids[i], indi) + } + var nIterations int + for nIterations < spec.MaxIterations { + nIterations++ + var ( + newSpecies = make([]Individuals, len(species)) + newTotal float64 + ) + // Recompute the new medoid inside each specie + for i, specie := range species { + specie.SortByDistanceToMedoid(dm) + medoids[i] = specie[0] + newSpecies[i] = append(newSpecies[i], specie[0]) + } + // Reassign each individual to the closest new medoid + for _, specie := range species { + for _, indi := range specie[1:] { + var i = indi.IdxOfClosest(medoids, dm) + newSpecies[i] = append(newSpecies[i], indi) + newTotal += dm.GetDistance(medoids[i], indi) + } + } + // No more iterations are needed if the new total is worse + if newTotal >= total { + break + } + copy(species, newSpecies) + total = newTotal + } + // Rebalance the species so that their are at least + rebalanceClusters(species, dm, spec.MinPerCluster) + return species, nil +} + +// Validate SpecKMedoids fields. +func (spec SpecKMedoids) Validate() error { + if spec.K < 2 { + return errors.New("K should be higher than 1") + } + if spec.Metric == nil { + return errors.New("Metric field has to be provided") + } + if spec.MaxIterations < 1 { + return errors.New("K should be higher than 0") + } + return nil +} + +// SpecFitnessInterval speciates a population based on the fitness of each +// individual where each species contains m = n/k (rounded to the closest upper +// integer) individuals with similar fitnesses. For example, with 4 species, 30 +// individuals would be split into 3 groups of 8 individuals and 1 group of 6 +// individuals (3*8 + 1*6 = 30). More generally each group is of size +// min(n-i, m) where i is a multiple of m. +type SpecFitnessInterval struct { + K int // Number of intervals +} + +// Apply SpecFitnessInterval. +func (spec SpecFitnessInterval) Apply(indis Individuals, rng *rand.Rand) ([]Individuals, error) { + // Check there are at least K Individuals + if len(indis) < spec.K { + return nil, fmt.Errorf("SpecFitnessInterval: have %d individuals and need at least %d", + len(indis), spec.K) + } + var ( + species = make([]Individuals, spec.K) + n = len(indis) + m = min(int(math.Ceil(float64(n/spec.K))), n) + ) + for i := range species { + var a, b = i * m, min((i+1)*m, n) + species[i] = indis[a:b] + } + return species, nil +} + +// Validate SpecFitnessInterval fields. +func (spec SpecFitnessInterval) Validate() error { + if spec.K < 2 { + return errors.New("K should be higher than 1") + } + return nil +} diff --git a/speciation_test.go b/speciation_test.go new file mode 100644 index 0000000..29d258a --- /dev/null +++ b/speciation_test.go @@ -0,0 +1,143 @@ +package gago + +import ( + "errors" + "fmt" + "math" + "testing" +) + +func TestSpecKMedoidsApply(t *testing.T) { + var ( + rng = newRand() + testCases = []struct { + indis Individuals + kmeds SpecKMedoids + speciesSizes []int + err error + }{ + // Example dataset from https://www.wikiwand.com/en/K-medoids + { + indis: Individuals{ + NewIndividual(Vector{2, 6}, rng), + NewIndividual(Vector{3, 4}, rng), + NewIndividual(Vector{3, 8}, rng), + NewIndividual(Vector{4, 7}, rng), + NewIndividual(Vector{6, 2}, rng), + NewIndividual(Vector{6, 4}, rng), + NewIndividual(Vector{7, 3}, rng), + NewIndividual(Vector{7, 4}, rng), + NewIndividual(Vector{8, 5}, rng), + NewIndividual(Vector{7, 6}, rng), + }, + kmeds: SpecKMedoids{2, 1, l1Distance, 10}, + speciesSizes: []int{4, 6}, + err: nil, + }, + { + indis: Individuals{ + NewIndividual(Vector{1, 1}, rng), + NewIndividual(Vector{1, 1}, rng), + }, + kmeds: SpecKMedoids{2, 1, l1Distance, 10}, + speciesSizes: []int{1, 1}, + err: nil, + }, + { + indis: Individuals{ + NewIndividual(Vector{1, 1}, rng), + NewIndividual(Vector{1, 2}, rng), + }, + kmeds: SpecKMedoids{3, 1, l1Distance, 10}, + speciesSizes: []int{1, 1}, + err: errors.New("K > len(indis)"), + }, + } + ) + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var species, err = tc.kmeds.Apply(tc.indis, rng) + // Check the number of species is correct + if err == nil && len(species) != tc.kmeds.K { + t.Error("Wrong number of species") + } + // Check size of each specie + if err == nil { + for j, specie := range species { + if len(specie) != tc.speciesSizes[j] { + t.Error("Wrong specie size") + } + } + } + // Check error is nil or not + if (err == nil) != (tc.err == nil) { + t.Error("Wrong error") + } + }) + } +} + +func TestSpecKMedoidsValidate(t *testing.T) { + var spec = SpecKMedoids{2, 1, l1Distance, 1} + if err := spec.Validate(); err != nil { + t.Error("Validation should not have raised error") + } + // Set K lower than 2 + spec.K = 1 + if err := spec.Validate(); err == nil { + t.Error("Validation should have raised error") + } + spec.K = 2 + // Nullify Metric + spec.Metric = nil + if err := spec.Validate(); err == nil { + t.Error("Validation should have raised error") + } + spec.Metric = l1Distance + // Set MaxIterations lower than 1 + spec.MaxIterations = 0 + if err := spec.Validate(); err == nil { + t.Error("Validation should have raised error") + } +} + +func TestSpecFitnessIntervalApply(t *testing.T) { + var ( + nIndividuals = []int{1, 2, 3} + nSpecies = []int{1, 2, 3} + rng = newRand() + ) + for _, nbi := range nIndividuals { + for _, nbs := range nSpecies { + var ( + m = min(int(math.Ceil(float64(nbi/nbs))), nbi) + indis = newIndividuals(nbi, NewVector, rng) + spec = SpecFitnessInterval{K: nbs} + species, _ = spec.Apply(indis, rng) + ) + // Check the cluster sizes are equal to min(n-i, m) where i is a + // multiple of m + for i, specie := range species { + var ( + expected = min(nbi-i*m, m) + obtained = len(specie) + ) + if obtained != expected { + t.Errorf("Wrong number of individuals, expected %d got %d", expected, obtained) + } + } + } + } +} + +func TestSpecFitnessIntervalValidate(t *testing.T) { + var spec = SpecFitnessInterval{2} + if err := spec.Validate(); err != nil { + t.Error("Validation should not have raised error") + } + // Set K lower than 2 + spec.K = 1 + if err := spec.Validate(); err == nil { + t.Error("Validation should have raised error") + } +} diff --git a/util.go b/util.go new file mode 100644 index 0000000..7d220e9 --- /dev/null +++ b/util.go @@ -0,0 +1,116 @@ +package gago + +import ( + "math" +) + +func newInts(n int) []int { + var ints = make([]int, n) + for i := range ints { + ints[i] = i + } + return ints +} + +// Divide each element in a float64 slice by a given value. +func divide(floats []float64, value float64) []float64 { + var divided = make([]float64, len(floats)) + for i, v := range floats { + divided[i] = v / value + } + return divided +} + +// Compute the cumulative sum of a float64 slice. +func cumsum(floats []float64) []float64 { + var summed = make([]float64, len(floats)) + copy(summed, floats) + for i := 1; i < len(summed); i++ { + summed[i] += summed[i-1] + } + return summed +} + +// Find the minimum between two ints. +func min(a, b int) int { + if a <= b { + return a + } + return b +} + +// Compute the sum of an int slice. +func sumInts(ints []int) (sum int) { + for _, v := range ints { + sum += v + } + return +} + +// Compute the sum of a float64 slice. +func sumFloat64s(floats []float64) (sum float64) { + for _, v := range floats { + sum += v + } + return +} + +// Compute the minimum value of a float64 slice. +func minFloat64s(floats []float64) (min float64) { + min = math.Inf(1) + for _, f := range floats { + if f < min { + min = f + } + } + return +} + +// Compute the maximum value of a float64 slice. +func maxFloat64s(floats []float64) (max float64) { + max = math.Inf(-1) + for _, f := range floats { + if f > max { + max = f + } + } + return +} + +// Compute the mean of a float64 slice. +func meanFloat64s(floats []float64) float64 { + return sumFloat64s(floats) / float64(len(floats)) +} + +// Compute the variance of a float64 slice. +func varianceFloat64s(floats []float64) float64 { + var ( + m = meanFloat64s(floats) + ss float64 + ) + for _, x := range floats { + ss += math.Pow(x-m, 2) + } + return ss / float64(len(floats)) +} + +type set map[interface{}]bool + +// union merges two slices and ignores duplicates. +func union(x, y set) set { + var ( + u = make(set) + blackList = make(map[interface{}]bool) + ) + for i := range x { + u[i] = true + blackList[i] = true + } + for i := range y { + if !blackList[i] { + u[i] = true + blackList[i] = true + } + } + return u +} diff --git a/util_random.go b/util_random.go new file mode 100644 index 0000000..e8b0671 --- /dev/null +++ b/util_random.go @@ -0,0 +1,75 @@ +package gago + +import ( + "fmt" + "math/rand" +) + +// Sample k unique integers in range [min, max) using reservoir sampling, +// specifically Vitter's Algorithm R. +func randomInts(k, min, max int, rng *rand.Rand) (ints []int) { + ints = make([]int, k) + for i := 0; i < k; i++ { + ints[i] = i + min + } + for i := k; i < max-min; i++ { + var j = rng.Intn(i + 1) + if j < k { + ints[j] = i + min + } + } + return +} + +// Sample k unique integers from a slice of n integers without replacement. +func sampleInts(ints []int, k int, rng *rand.Rand) ([]int, []int, error) { + if k > len(ints) { + return nil, nil, fmt.Errorf("Cannot sample %d elements from array of length %d", k, len(ints)) + } + var ( + sample = make([]int, k) + idxs = make([]int, k) + ) + for i, idx := range randomInts(k, 0, len(ints), rng) { + sample[i] = ints[idx] + idxs[i] = idx + } + return sample, idxs, nil +} + +// Generate random weights that sum up to 1. +func randomWeights(size int) []float64 { + var ( + weights = make([]float64, size) + total float64 + ) + for i := range weights { + weights[i] = rand.Float64() + total += weights[i] + } + var normalized = divide(weights, total) + return normalized +} + +const ( + letterBytes = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + letterIdxBits = 6 // 6 bits to represent a letter index + letterIdxMask = 1<= 0; { + if remain == 0 { + cache, remain = rng.Int63(), letterIdxMax + } + if idx := int(cache & letterIdxMask); idx < len(letterBytes) { + b[i] = letterBytes[idx] + i-- + } + cache >>= letterIdxBits + remain-- + } + return string(b) +} diff --git a/util_random_test.go b/util_random_test.go new file mode 100644 index 0000000..d8fb9cd --- /dev/null +++ b/util_random_test.go @@ -0,0 +1,116 @@ +package gago + +import ( + "errors" + "fmt" + "math" + "math/rand" + "testing" + "time" +) + +func TestRandomInts(t *testing.T) { + var ( + src = rand.NewSource(time.Now().UnixNano()) + rng = rand.New(src) + testCases = []struct { + k, min, max int + }{ + {1, 0, 1}, + {1, 0, 2}, + {2, 0, 2}, + } + ) + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var ints = randomInts(tc.k, tc.min, tc.max, rng) + // Check the number of generated integers + if len(ints) != tc.k { + t.Error("randomInts didn't generate the right number of integers") + } + // Check the bounds of each generated integer + for _, integer := range ints { + if integer < tc.min || integer >= tc.max { + t.Error("randomInts didn't generate integers in the desired range") + } + } + // Check the generated integers are unique + for i, a := range ints { + for j, b := range ints { + if i != j && a == b { + t.Error("randomInts didn't generate unique integers") + } + } + } + }) + } +} + +func TestSampleInts(t *testing.T) { + var testCases = []struct { + ints []int + k int + err error + }{ + { + ints: []int{1, 2, 3}, + k: 0, + err: nil, + }, + { + ints: []int{1, 2, 3}, + k: 1, + err: nil, + }, + { + ints: []int{1, 2, 3}, + k: 2, + err: nil, + }, + { + ints: []int{1, 2, 3}, + k: 3, + err: nil, + }, + { + ints: []int{1, 2, 3}, + k: 4, + err: errors.New("k > len(ints)"), + }, + } + var rng = newRand() + for i, tc := range testCases { + t.Run(fmt.Sprintf("TC %d", i), func(t *testing.T) { + var ints, idxs, err = sampleInts(tc.ints, tc.k, rng) + if (err == nil) != (tc.err == nil) { + t.Error("Error") + } else { + if err == nil && (len(ints) != tc.k || len(idxs) != tc.k) { + t.Error("Error") + } + } + }) + } +} + +func TestRandomWeights(t *testing.T) { + var ( + sizes = []int{1, 30, 500} + limit = math.Pow(1, -10) + ) + for _, size := range sizes { + var weights = randomWeights(size) + // Test the length of the resulting slice + if len(weights) != size { + t.Error("Size problem with randomWeights") + } + // Test the elements in the slice sum up to 1 + var sum float64 + for _, weight := range weights { + sum += weight + } + if math.Abs(sum-1.0) > limit { + t.Error("Sum problem with randomWeights") + } + } +} diff --git a/util_test.go b/util_test.go new file mode 100644 index 0000000..930f11f --- /dev/null +++ b/util_test.go @@ -0,0 +1,163 @@ +package gago + +import ( + "testing" +) + +func TestMin(t *testing.T) { + var testCases = [][]int{ + []int{1, 2}, + []int{2, 2}, + []int{2, 3}, + } + for _, test := range testCases { + if min(test[0], test[1]) != test[0] { + t.Error("min didn't find the smallest integer") + } + } +} + +func TestSumFloat64s(t *testing.T) { + var testCases = []struct { + floats []float64 + total float64 + }{ + {[]float64{1, 2, 3}, 6}, + {[]float64{-1, 1}, 0}, + {[]float64{1.42, 42.1}, 43.52}, + } + for _, test := range testCases { + if sumFloat64s(test.floats) != test.total { + t.Error("sumFloat64s didn't work as expected") + } + } +} + +func TestMinFloat64s(t *testing.T) { + var testCases = []struct { + floats []float64 + min float64 + }{ + {[]float64{1.0}, 1.0}, + {[]float64{1.0, 2.0}, 1.0}, + {[]float64{-1.0, 1.0}, -1.0}, + } + for _, test := range testCases { + if minFloat64s(test.floats) != test.min { + t.Error("meanFloat64s didn't work as expected") + } + } +} + +func TestMaxFloat64s(t *testing.T) { + var testCases = []struct { + floats []float64 + max float64 + }{ + {[]float64{1.0}, 1.0}, + {[]float64{1.0, 2.0}, 2.0}, + {[]float64{-1.0, 1.0}, 1.0}, + } + for _, test := range testCases { + if maxFloat64s(test.floats) != test.max { + t.Error("maxFloat64s didn't work as expected") + } + } +} + +func TestMeanFloat64s(t *testing.T) { + var testCases = []struct { + floats []float64 + mean float64 + }{ + {[]float64{1.0}, 1.0}, + {[]float64{1.0, 2.0}, 1.5}, + {[]float64{-1.0, 1.0}, 0.0}, + } + for _, test := range testCases { + if meanFloat64s(test.floats) != test.mean { + t.Error("meanFloat64s didn't work as expected") + } + } +} + +func TestVarianceFloat64s(t *testing.T) { + var testCases = []struct { + floats []float64 + variance float64 + }{ + {[]float64{1.0}, 0.0}, + {[]float64{-1.0, 1.0}, 1.0}, + {[]float64{-2.0, 2.0}, 4.0}, + } + for _, test := range testCases { + if varianceFloat64s(test.floats) != test.variance { + t.Error("varianceFloat64s didn't work as expected") + } + } +} + +func TestDivide(t *testing.T) { + var testCases = []struct { + floats []float64 + value float64 + divided []float64 + }{ + {[]float64{1, 1}, 1, []float64{1, 1}}, + {[]float64{1, 1}, 2, []float64{0.5, 0.5}}, + {[]float64{42, -42}, 21, []float64{2, -2}}, + } + for _, test := range testCases { + var divided = divide(test.floats, test.value) + for i := range divided { + if divided[i] != test.divided[i] { + t.Error("divided didn't work as expected") + } + } + } +} + +func TestCumsum(t *testing.T) { + var testCases = []struct { + floats []float64 + summed []float64 + }{ + {[]float64{1, 2, 3, 4}, []float64{1, 3, 6, 10}}, + {[]float64{-1, 0, 1}, []float64{-1, -1, 0}}, + } + for _, test := range testCases { + var summed = cumsum(test.floats) + for i := range summed { + if summed[i] != test.summed[i] { + t.Error("cumsum didn't work as expected") + } + } + } +} + +func TestUnion(t *testing.T) { + var testCases = []struct { + x set + y set + u set + }{ + { + x: set{1: true, 2: true, 3: true}, + y: set{4: true, 5: true, 6: true}, + u: set{1: true, 2: true, 3: true, 4: true, 5: true, 6: true}, + }, + { + x: set{1: true, 2: true, 3: true}, + y: set{2: true, 3: true, 4: true}, + u: set{1: true, 2: true, 3: true, 4: true}, + }, + } + for _, test := range testCases { + var u = union(test.x, test.y) + for i := range u { + if !test.u[i] { + t.Error("union didn't work as expected") + } + } + } +}