Merge pull request #6 from kobanium/bugfix

bugfix (the initial action value of MCTS)
kobanium · May 28, 2019 · 564623f · 564623f
2 parents b1e4de5 + f9fdcc1
commit 564623f
Show file tree

Hide file tree

Showing 6 changed files with 35 additions and 10 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,12 @@
 
 ## Unreleased
 
+## 1.1 - 2019-5-27
+
+### Fixed
+- the initial action value in MCTS code
+
+
 ## 1.1 - 2019-5-5
 
 ### Fixed

diff --git a/README.md b/README.md
@@ -7,6 +7,11 @@
 GPUがあれば、より高速に棋譜を生成できます。
 CPUだと10倍から100倍遅くなりますが、将棋をプレイして楽しむことは可能です。
 
+# 重要なニュース
+
+release 1.2 では非常に重要な不具合修正がbin/aobazになされました。アップデートをお願い致します。
+
+
 # AobaZero
 
 AobaZeroは、AlphaZeroの将棋の実験の追試を行うことを最終目的とした将棋AIプロジェクトです。

diff --git a/src/common/version.hpp b/src/common/version.hpp
@@ -3,5 +3,5 @@
 namespace Ver {
   constexpr unsigned char major = 1;
   constexpr unsigned char minor = 1;
-  constexpr unsigned short usi_engin = 5;
+  constexpr unsigned short usi_engin = 9;
 }
diff --git a/src/usi_engine/bona/shogi.h b/src/usi_engine/bona/shogi.h
@@ -111,7 +111,8 @@ extern unsigned char ailast_one[512];
 //#define BNZ_VER                 "5"	// 20190324
 //#define BNZ_VER                 "6"	// 20190419
 //#define BNZ_VER                 "7"	// 20190420
-#define BNZ_VER                 "8"	// 20190430
+//#define BNZ_VER                 "8"	// 20190430
+#define BNZ_VER                 "9"	// 20190527
 #define BNZ_NAME                "AobaZero"
 
 #define REP_MAX_PLY             32

diff --git a/src/usi_engine/bona/yss_net.cpp b/src/usi_engine/bona/yss_net.cpp
@@ -393,7 +393,7 @@ float get_network_policy_value(tree_t * restrict ptree, int sideToMove, int ply,
 	}
 
 	float mul = 1.0f;
-	if ( all_sum > legal_sum && legal_sum > 0 ) mul = all_sum / legal_sum;
+	if ( all_sum > legal_sum && legal_sum > 0 ) mul = 1.0f / legal_sum;
 	for ( i = 0; i < phg->child_num; i++ ) {
 		CHILD *pc = &phg->child[i];
 		if ( 1 && ply==1 && i < 30 ) {

diff --git a/src/usi_engine/bona/ysszero.cpp b/src/usi_engine/bona/ysszero.cpp
@@ -712,16 +712,16 @@ void create_node(tree_t * restrict ptree, int sideToMove, int ply, HASH_SHOGI *p
 
 	if ( NOT_USE_NN ) {
 		// softmax
-		const float temperature = 1.0f;
-		float max = -10000000;
+		const float temperature_inv = 1.0f;
+		float max = -10000000.0f;
 		for (i=0; i<move_num; i++) {
 			CHILD *pc = &phg->child[i];
 			if ( max < pc->bias ) max = pc->bias;
 		}
 		float sum = 0;
 		for (i=0; i<move_num; i++) {
 			CHILD *pc = &phg->child[i];
-			pc->bias = (float)exp((pc->bias - max)*temperature);
+			pc->bias = std::exp((pc->bias - max)*temperature_inv);
 			sum += pc->bias;
 		}
 		for(i=0; i<move_num; i++){
@@ -789,11 +789,24 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply)
 		CHILD *pc  = &phg->child[loop];
 		if ( pc->value == ILLEGAL_MOVE ) continue;
 
-		const double cBASE = 19652;
+		const double cBASE = 19652.0;
 		const double cINIT = 1.25;
-		double c = log((1.0 + phg->games_sum + cBASE) / cBASE) + cINIT;	// when 800 playout, cBASE has no effect.
-		double puct = c * pc->bias * sqrt((double)(phg->games_sum + 1.0)) / (pc->games + 1.0);	// when games_sum = 0, +1.0 is necessary. (paper bug)
-		double uct_value = pc->value + puct;
+		// cBASE has little effect on the value of c if games_sum is
+		// sufficiently smaller than x.
+		double c = (std::log((1.0 + phg->games_sum + cBASE) / cBASE)
+			    + cINIT);
+
+		// The number of visits to the parent is games_sum + 1.
+		// There may by a bug in pseudocode.py regarding this.
+		double puct = (c * pc->bias
+			       * std::sqrt(static_cast<double>(phg->games_sum
+							       + 1))
+			       / static_cast<double>(pc->games + 1));
+		double mean_action_value = (pc->games == 0) ? -1.0 : pc->value;
+
+		// We must multiply puct by two because the range of
+		// mean_action_value is [-1, 1] instead of [0, 1].
+		double uct_value = mean_action_value + 2.0 * puct;
 
 //		if ( depth==0 && phg->games_sum==500 ) PRT("%3d:v=%5.3f,p=%5.3f,u=%5.3f,g=%4d,s=%5d\n",loop,pc->value,puct,uct_value,pc->games,phg->games_sum);
 		if ( uct_value > max_value ) {