From d40ffe59a76701429a69cbcb56c816994d6c52b7 Mon Sep 17 00:00:00 2001 From: Carsten Wenderdel Date: Thu, 5 Oct 2023 21:23:16 +0200 Subject: [PATCH] Document code structure --- Cargo.lock | 1 - README.md | 39 ++++++++++------------------ crates/wildbg-c/Cargo.toml | 1 - docs/dev/architecture.md | 53 ++++++++++++++++++++++++++++++++++++++ 4 files changed, 67 insertions(+), 27 deletions(-) create mode 100644 docs/dev/architecture.md diff --git a/Cargo.lock b/Cargo.lock index 7a97111..efb6592 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3163,7 +3163,6 @@ checksum = "14247bb57be4f377dfb94c72830b8ce8fc6beac03cf4bf7b9732eadd414123fc" name = "wildbg-c" version = "0.0.0" dependencies = [ - "engine", "logic", ] diff --git a/README.md b/README.md index af642fa..7b70e34 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # wildbg -`wildbg` is a backgammon engine based on neural networks. Currently, it's in a pre-alpha phase. +`wildbg` is a backgammon engine based on neural networks. Currently, it's in alpha stage. ## Try it out @@ -15,43 +15,32 @@ An example for the starting position and rolling 3 and 1: https://wildbg.shuttle Install Rust on your machine and then execute `cargo run` or `cargo run --release`. A web server will be started which you can access via http://localhost:8080/swagger-ui/ +Beware that the network committed to this repository is a bit older and weaker. +You can find the latest training progress and networks here: https://github.com/carsten-wenderdel/wildbg-training + ## Goals 1. Provide source code and documentation to train neural nets from zero to super human strength. 2. Implement logic to evaluate all kind of backgammon positions: cubeless and cubeful equities, multi-ply evaluation, rollouts, etc. -3. Make the backgammon engine accessible via an easy-to-use HTTP/json API. +3. Make the backgammon engine accessible via an easy-to-use HTTP JSON API. A graphical user interface (GUI) is not part of this project. -## Current state - -#### Topic 1: Neural nets +## Training process The training process consists of three steps, which are repeated in a loop: -1. Find lots of positions for a later rollout. Currently, rather random self play is used; later we want to make sure to find all kind of positions, including backgames. +1. Find lots of positions (at least 100,000) through self-play for a later rollout. 2. Roll out these positions. Currently, only 1-ply rollouts are possible. -3. Train neural networks based on the rollout data. Currently, a single net with several hidden layers is supported; later different nets for different game phases are planned. - -Already implemented is: -* Roll out a certain position 1296 times, multithreaded. -* 202 neural net inputs similar to TD-Gammon, representing the raw board. -* Find 100,000 random positions through self play for later rollouts. -* Train a single neural net with one hidden layer via PyTorch and save the result as an ONNX file. The net has six outputs for winning/losing 1, 2 or 3 points. -* Inference of that neural net in Rust via [tract](https://github.com/sonos/tract). - -An older, weaker neural network is committed to this repository. You can find the latest training progress and networks here: https://github.com/carsten-wenderdel/wildbg-training - -#### Topic 2: Backgammon logic -Currently only cubeless equities and moves are implemented. Cubes and cubeful equities are missing. - -#### Topic 3: HTTP/json API +3. Train neural networks based on the rollout data. Currently, a single net with several hidden layers is supported; later different nets for different game phases are planned. This third step is the only one done in Python, everything else is implemented in Rust. -Getting the best move is already implemented: https://wildbg.shuttleapp.rs/swagger-ui/ +## Documentation -### Installation of python environment +#### For users (bots and GUIs) +- HTTP API: https://wildbg.shuttleapp.rs/swagger-ui/ +- C API: [docs/user/wildbg-c.md](docs/user/wildbg-c.md) -Make an editable install by targeting the training directory: -``` pip install -e "training/[dev]"``` +#### For contributors +- Code structure: [docs/dev/architecture.md](docs/dev/architecture.md) ## Contributing diff --git a/crates/wildbg-c/Cargo.toml b/crates/wildbg-c/Cargo.toml index 07c93bd..3b0fef7 100644 --- a/crates/wildbg-c/Cargo.toml +++ b/crates/wildbg-c/Cargo.toml @@ -10,5 +10,4 @@ crate-type = ["staticlib"] [dependencies] # internal -engine = { path = "../engine" } logic = { path = "../logic" } diff --git a/docs/dev/architecture.md b/docs/dev/architecture.md new file mode 100644 index 0000000..df937d9 --- /dev/null +++ b/docs/dev/architecture.md @@ -0,0 +1,53 @@ +# Internal Architecture + +The whole project is split into a Python and a Rust part. The Python part is responsible for training the neural nets, which will take only minutes, once there is training data. +The Rust part is responsible for everything else - so both the engine itself, but also generating training data through rollouts based on older nets. Generating training data takes several days for each iteration. + +## Rust crates +The Rust code is split into 5 different crates. + +```mermaid +flowchart + coach ---> engine + wildbg-c --> logic + logic --> engine + web ---> engine + web --> logic +``` + +### engine +[`engine`](../../crates/engine) contains the core parts +- Move generation: given a position and a pair of dice – what are legal moves/positions following? +- Inputs generation: given a position – what are the proper inputs for the neural networks? +- Inference of the neural net: given the proper inputs – what are the cubeless money game probabilities for winning/losing normal/gammon/backgammon? +- Best move: Given a position and a pair of dice – what's the best move following? + +### coach +[`coach`](../../crates/coach) contains everything to generate training data and compare the performance of different neural networks. + +This is achieved by finding many (more than 100,000) positions through self-play and subsequently rolling out these positions. + +We then have a large set of positions and probabilities. In other words: we then have a large training set of inputs and outputs for the neural nets. + +### logic + +While `engine` only deals with cubeless equities, [`logic`](../../crates/logic) is handling cubeful equities, match play and cubes. This is mainly work in progress so far. + +### wildbg-c + +[`wildbg-c`](../../crates/wildbg-c) is a small layer on top of `logic` which allows C code to access `wildbg`. + +### web + +[`web`](../../crates/web) contains the HTTP JSON API for bots and GUIs to access the user facing features. + +The OpenAPI documentation is autogenerated by the Rust code and can be accessed here: https://wildbg.shuttleapp.rs/swagger-ui/ + +Currently `web` depends on `logic` and `engine`. In the future we might clean it up and remove the dependency on `engine`. + +## Python training + +The [training](../../training) folder contains Python code for training the neural networks with PyTorch. + +Training data first needs to be generated with [`coach`](#coach). Existing training data can be found in the external repository https://github.com/carsten-wenderdel/wildbg-training +