Skip to content

Commit

Permalink
Merge pull request #6 from kobanium/bugfix
Browse files Browse the repository at this point in the history
bugfix (the initial action value of MCTS)
  • Loading branch information
KunihitoHoki authored May 28, 2019
2 parents b1e4de5 + f9fdcc1 commit 564623f
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 10 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

## Unreleased

## 1.1 - 2019-5-27

### Fixed
- the initial action value in MCTS code


## 1.1 - 2019-5-5

### Fixed
Expand Down
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@
GPUがあれば、より高速に棋譜を生成できます。
CPUだと10倍から100倍遅くなりますが、将棋をプレイして楽しむことは可能です。

# 重要なニュース

release 1.2 では非常に重要な不具合修正がbin/aobazになされました。アップデートをお願い致します。


# AobaZero

AobaZeroは、AlphaZeroの将棋の実験の追試を行うことを最終目的とした将棋AIプロジェクトです。
Expand Down
2 changes: 1 addition & 1 deletion src/common/version.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
namespace Ver {
constexpr unsigned char major = 1;
constexpr unsigned char minor = 1;
constexpr unsigned short usi_engin = 5;
constexpr unsigned short usi_engin = 9;
}
3 changes: 2 additions & 1 deletion src/usi_engine/bona/shogi.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ extern unsigned char ailast_one[512];
//#define BNZ_VER "5" // 20190324
//#define BNZ_VER "6" // 20190419
//#define BNZ_VER "7" // 20190420
#define BNZ_VER "8" // 20190430
//#define BNZ_VER "8" // 20190430
#define BNZ_VER "9" // 20190527
#define BNZ_NAME "AobaZero"

#define REP_MAX_PLY 32
Expand Down
2 changes: 1 addition & 1 deletion src/usi_engine/bona/yss_net.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ float get_network_policy_value(tree_t * restrict ptree, int sideToMove, int ply,
}

float mul = 1.0f;
if ( all_sum > legal_sum && legal_sum > 0 ) mul = all_sum / legal_sum;
if ( all_sum > legal_sum && legal_sum > 0 ) mul = 1.0f / legal_sum;
for ( i = 0; i < phg->child_num; i++ ) {
CHILD *pc = &phg->child[i];
if ( 1 && ply==1 && i < 30 ) {
Expand Down
27 changes: 20 additions & 7 deletions src/usi_engine/bona/ysszero.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -712,16 +712,16 @@ void create_node(tree_t * restrict ptree, int sideToMove, int ply, HASH_SHOGI *p

if ( NOT_USE_NN ) {
// softmax
const float temperature = 1.0f;
float max = -10000000;
const float temperature_inv = 1.0f;
float max = -10000000.0f;
for (i=0; i<move_num; i++) {
CHILD *pc = &phg->child[i];
if ( max < pc->bias ) max = pc->bias;
}
float sum = 0;
for (i=0; i<move_num; i++) {
CHILD *pc = &phg->child[i];
pc->bias = (float)exp((pc->bias - max)*temperature);
pc->bias = std::exp((pc->bias - max)*temperature_inv);
sum += pc->bias;
}
for(i=0; i<move_num; i++){
Expand Down Expand Up @@ -789,11 +789,24 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply)
CHILD *pc = &phg->child[loop];
if ( pc->value == ILLEGAL_MOVE ) continue;

const double cBASE = 19652;
const double cBASE = 19652.0;
const double cINIT = 1.25;
double c = log((1.0 + phg->games_sum + cBASE) / cBASE) + cINIT; // when 800 playout, cBASE has no effect.
double puct = c * pc->bias * sqrt((double)(phg->games_sum + 1.0)) / (pc->games + 1.0); // when games_sum = 0, +1.0 is necessary. (paper bug)
double uct_value = pc->value + puct;
// cBASE has little effect on the value of c if games_sum is
// sufficiently smaller than x.
double c = (std::log((1.0 + phg->games_sum + cBASE) / cBASE)
+ cINIT);

// The number of visits to the parent is games_sum + 1.
// There may by a bug in pseudocode.py regarding this.
double puct = (c * pc->bias
* std::sqrt(static_cast<double>(phg->games_sum
+ 1))
/ static_cast<double>(pc->games + 1));
double mean_action_value = (pc->games == 0) ? -1.0 : pc->value;

// We must multiply puct by two because the range of
// mean_action_value is [-1, 1] instead of [0, 1].
double uct_value = mean_action_value + 2.0 * puct;

// if ( depth==0 && phg->games_sum==500 ) PRT("%3d:v=%5.3f,p=%5.3f,u=%5.3f,g=%4d,s=%5d\n",loop,pc->value,puct,uct_value,pc->games,phg->games_sum);
if ( uct_value > max_value ) {
Expand Down

0 comments on commit 564623f

Please sign in to comment.