diff --git a/CHANGELOG.md b/CHANGELOG.md index a33ea53..a60c4b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ ## Unreleased +## 1.1 - 2019-5-27 + +### Fixed +- the initial action value in MCTS code + + ## 1.1 - 2019-5-5 ### Fixed diff --git a/README.md b/README.md index 701d769..db164c3 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,11 @@ GPUがあれば、より高速に棋譜を生成できます。 CPUだと10倍から100倍遅くなりますが、将棋をプレイして楽しむことは可能です。 +# 重要なニュース + +release 1.2 では非常に重要な不具合修正がbin/aobazになされました。アップデートをお願い致します。 + + # AobaZero AobaZeroは、AlphaZeroの将棋の実験の追試を行うことを最終目的とした将棋AIプロジェクトです。 diff --git a/src/common/version.hpp b/src/common/version.hpp index 9c517fe..2c66d41 100644 --- a/src/common/version.hpp +++ b/src/common/version.hpp @@ -3,5 +3,5 @@ namespace Ver { constexpr unsigned char major = 1; constexpr unsigned char minor = 1; - constexpr unsigned short usi_engin = 5; + constexpr unsigned short usi_engin = 9; } diff --git a/src/usi_engine/bona/shogi.h b/src/usi_engine/bona/shogi.h index 961302f..9386d88 100644 --- a/src/usi_engine/bona/shogi.h +++ b/src/usi_engine/bona/shogi.h @@ -111,7 +111,8 @@ extern unsigned char ailast_one[512]; //#define BNZ_VER "5" // 20190324 //#define BNZ_VER "6" // 20190419 //#define BNZ_VER "7" // 20190420 -#define BNZ_VER "8" // 20190430 +//#define BNZ_VER "8" // 20190430 +#define BNZ_VER "9" // 20190527 #define BNZ_NAME "AobaZero" #define REP_MAX_PLY 32 diff --git a/src/usi_engine/bona/yss_net.cpp b/src/usi_engine/bona/yss_net.cpp index 74daa67..4c3e217 100644 --- a/src/usi_engine/bona/yss_net.cpp +++ b/src/usi_engine/bona/yss_net.cpp @@ -393,7 +393,7 @@ float get_network_policy_value(tree_t * restrict ptree, int sideToMove, int ply, } float mul = 1.0f; - if ( all_sum > legal_sum && legal_sum > 0 ) mul = all_sum / legal_sum; + if ( all_sum > legal_sum && legal_sum > 0 ) mul = 1.0f / legal_sum; for ( i = 0; i < phg->child_num; i++ ) { CHILD *pc = &phg->child[i]; if ( 1 && ply==1 && i < 30 ) { diff --git a/src/usi_engine/bona/ysszero.cpp b/src/usi_engine/bona/ysszero.cpp index eac9932..9c822d1 100644 --- a/src/usi_engine/bona/ysszero.cpp +++ b/src/usi_engine/bona/ysszero.cpp @@ -712,8 +712,8 @@ void create_node(tree_t * restrict ptree, int sideToMove, int ply, HASH_SHOGI *p if ( NOT_USE_NN ) { // softmax - const float temperature = 1.0f; - float max = -10000000; + const float temperature_inv = 1.0f; + float max = -10000000.0f; for (i=0; ichild[i]; if ( max < pc->bias ) max = pc->bias; @@ -721,7 +721,7 @@ void create_node(tree_t * restrict ptree, int sideToMove, int ply, HASH_SHOGI *p float sum = 0; for (i=0; ichild[i]; - pc->bias = (float)exp((pc->bias - max)*temperature); + pc->bias = std::exp((pc->bias - max)*temperature_inv); sum += pc->bias; } for(i=0; ichild[loop]; if ( pc->value == ILLEGAL_MOVE ) continue; - const double cBASE = 19652; + const double cBASE = 19652.0; const double cINIT = 1.25; - double c = log((1.0 + phg->games_sum + cBASE) / cBASE) + cINIT; // when 800 playout, cBASE has no effect. - double puct = c * pc->bias * sqrt((double)(phg->games_sum + 1.0)) / (pc->games + 1.0); // when games_sum = 0, +1.0 is necessary. (paper bug) - double uct_value = pc->value + puct; + // cBASE has little effect on the value of c if games_sum is + // sufficiently smaller than x. + double c = (std::log((1.0 + phg->games_sum + cBASE) / cBASE) + + cINIT); + + // The number of visits to the parent is games_sum + 1. + // There may by a bug in pseudocode.py regarding this. + double puct = (c * pc->bias + * std::sqrt(static_cast(phg->games_sum + + 1)) + / static_cast(pc->games + 1)); + double mean_action_value = (pc->games == 0) ? -1.0 : pc->value; + + // We must multiply puct by two because the range of + // mean_action_value is [-1, 1] instead of [0, 1]. + double uct_value = mean_action_value + 2.0 * puct; // if ( depth==0 && phg->games_sum==500 ) PRT("%3d:v=%5.3f,p=%5.3f,u=%5.3f,g=%4d,s=%5d\n",loop,pc->value,puct,uct_value,pc->games,phg->games_sum); if ( uct_value > max_value ) {