From 91a9f2dbba8bc9fbe0306103a9460f18b5460214 Mon Sep 17 00:00:00 2001 From: yssaya Date: Sun, 21 Aug 2022 01:10:03 +0900 Subject: [PATCH 1/7] hcpe3 converter. --- learn/convert/hcpe3_to_csa.py | 115 ++++++++++++++++++++++++++++++++++ learn/convert/readme.txt | 4 ++ 2 files changed, 119 insertions(+) create mode 100644 learn/convert/hcpe3_to_csa.py create mode 100644 learn/convert/readme.txt diff --git a/learn/convert/hcpe3_to_csa.py b/learn/convert/hcpe3_to_csa.py new file mode 100644 index 0000000..6c0ea42 --- /dev/null +++ b/learn/convert/hcpe3_to_csa.py @@ -0,0 +1,115 @@ +from cshogi import * +from cshogi import CSA +import numpy as np +import sys +import os +import glob +import math +import argparse + +HuffmanCodedPosAndEval3 = np.dtype([ + ('hcp', dtypeHcp), # 開始局面 + ('moveNum', np.uint16), # 手数 + ('result', np.uint8), # 結果(xxxxxx11:勝敗、xxxxx1xx:千日手、xxxx1xxx:入玉宣言、xxx1xxxx:最大手数) + ('opponent', np.uint8), # 対戦相手(0:自己対局、1:先手usi、2:後手usi) + ]) +MoveInfo = np.dtype([ + ('selectedMove16', dtypeMove16), # 指し手 + ('eval', dtypeEval), # 評価値 + ('candidateNum', np.uint16), # 候補手の数 + ]) +MoveVisits = np.dtype([ + ('move16', dtypeMove16), # 候補手 + ('visitNum', np.uint16), # 訪問回数 + ]) + +ENDGAME_SYMBOLS = { + 1 : '%TORYO', + 2 : '%TORYO', + 4 : '%SENNICHITE', + 9 : '%KACHI', + 10: '%KACHI', + 16: '%CHUDAN', +} + +parser = argparse.ArgumentParser() +parser.add_argument('hcpe3') +parser.add_argument('csa') +parser.add_argument('--range') +parser.add_argument('--aoba', action='store_true') +parser.add_argument('--out_v', action='store_true') +parser.add_argument('--sort_visits', action='store_true') +args = parser.parse_args() + +f = open(args.hcpe3, 'rb') + +if args.aoba: + sep = ',' +else: + sep = '\n' + +if args.range: + start_end = args.range.split(':') + if len(start_end) == 1: + start = int(start_end[0]) + end = start + 1 + else: + if start_end[0] == '': + start = 0 + else: + start = int(start_end[0]) + if start_end[1] == '': + end = sys.maxsize + else: + end = int(start_end[1]) +else: + start = 0 + end = sys.maxsize + +board = Board() +csa = CSA.Exporter(args.csa) +p = 0 +while p < end: + data = f.read(HuffmanCodedPosAndEval3.itemsize) + if len(data) == 0: + break + hcpe = np.frombuffer(data, HuffmanCodedPosAndEval3, 1)[0] + board.set_hcp(hcpe['hcp']) + assert board.is_ok() + move_num = hcpe['moveNum'] + result = hcpe['result'] + + if p >= start: + csa.info(board, comments=[f"moveNum={move_num},result={result},opponent={hcpe['opponent']}"]) + + for i in range(move_num): + move_info = np.frombuffer(f.read(MoveInfo.itemsize), MoveInfo, 1)[0] + candidate_num = move_info['candidateNum'] + move_visits = np.frombuffer(f.read(MoveVisits.itemsize * candidate_num), MoveVisits, candidate_num) + move = board.move_from_move16(move_info['selectedMove16']) + if p >= start: + if candidate_num > 0: + if args.aoba: + if args.out_v: + v = 1.0 / (1.0 + math.exp(-move_info['eval'] * 0.0013226)) + comment = f"v={v:.3f}," + else: + comment = '' + comment += f"{move_visits['visitNum'].sum()}" + if args.sort_visits: + move_visits = np.sort(move_visits, order='visitNum')[::-1] + for move16, visit_num in zip(move_visits['move16'], move_visits['visitNum']): + comment += ',' + move_to_csa(board.move_from_move16(move16)) + ',' + str(visit_num) + else: + comment = '** ' + str(move_info['eval'] * (1 - board.turn * 2)) + else: + if args.aoba or move_info['eval'] == 0: + comment = None + else: + comment = '** ' + str(move_info['eval'] * (1 - board.turn * 2)) + csa.move(move, comment=comment, sep=sep) + board.push(move) + assert board.is_ok() + if p >= start: + csa.endgame(ENDGAME_SYMBOLS[hcpe['result']]) + p += 1 diff --git a/learn/convert/readme.txt b/learn/convert/readme.txt new file mode 100644 index 0000000..e2524db --- /dev/null +++ b/learn/convert/readme.txt @@ -0,0 +1,4 @@ +https://tadaoyamaoka.hatenablog.com/entry/2021/05/06/223701 + +python3 ./hcpe3_to_csa.py --aoba --out_v --sort_visits selfplay_gct-051.hcpe3 dummy.csa >> selfplay_gct-051.csa + From 9618256f715250d3ef1a62d6f3a2282dc4e0f9d1 Mon Sep 17 00:00:00 2001 From: yssaya Date: Sun, 21 Aug 2022 01:16:51 +0900 Subject: [PATCH 2/7] modified hcpe3_to_csa.py --- learn/convert/hcpe3_to_csa.py | 36 ++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/learn/convert/hcpe3_to_csa.py b/learn/convert/hcpe3_to_csa.py index 6c0ea42..2284acd 100644 --- a/learn/convert/hcpe3_to_csa.py +++ b/learn/convert/hcpe3_to_csa.py @@ -36,6 +36,7 @@ parser.add_argument('hcpe3') parser.add_argument('csa') parser.add_argument('--range') +parser.add_argument('--nyugyoku', action='store_true') parser.add_argument('--aoba', action='store_true') parser.add_argument('--out_v', action='store_true') parser.add_argument('--sort_visits', action='store_true') @@ -68,6 +69,8 @@ board = Board() csa = CSA.Exporter(args.csa) +v_pos_sum = 0 +move_sum = 0 p = 0 while p < end: data = f.read(HuffmanCodedPosAndEval3.itemsize) @@ -78,17 +81,20 @@ assert board.is_ok() move_num = hcpe['moveNum'] result = hcpe['result'] - - if p >= start: - csa.info(board, comments=[f"moveNum={move_num},result={result},opponent={hcpe['opponent']}"]) - + move_sum += move_num + need_output = p >= start and (not args.nyugyoku or result & 8 != 0) + if need_output: +# csa.info(board, comments=[f"moveNum={move_num},result={result},opponent={hcpe['opponent']}"]) + print ("'move_num=" +str(move_num) +",result=" +str(result)+ ",opponent=" + str(hcpe['opponent'])) + print (board) for i in range(move_num): move_info = np.frombuffer(f.read(MoveInfo.itemsize), MoveInfo, 1)[0] candidate_num = move_info['candidateNum'] move_visits = np.frombuffer(f.read(MoveVisits.itemsize * candidate_num), MoveVisits, candidate_num) move = board.move_from_move16(move_info['selectedMove16']) - if p >= start: + if need_output: if candidate_num > 0: + v_pos_sum += 1 if args.aoba: if args.out_v: v = 1.0 / (1.0 + math.exp(-move_info['eval'] * 0.0013226)) @@ -104,12 +110,24 @@ comment = '** ' + str(move_info['eval'] * (1 - board.turn * 2)) else: if args.aoba or move_info['eval'] == 0: - comment = None +# comment = None + comment = "" else: comment = '** ' + str(move_info['eval'] * (1 - board.turn * 2)) - csa.move(move, comment=comment, sep=sep) +# csa.move(move, comment=comment, sep=sep) +# csa.move(move, sep=sep) + if (board.turn) == 0: + sen = "+" + else: + sen = "-" + print (sen + move_to_csa(move) + ",'" + comment) board.push(move) assert board.is_ok() - if p >= start: - csa.endgame(ENDGAME_SYMBOLS[hcpe['result']]) + if need_output: + print (ENDGAME_SYMBOLS[hcpe['result']]) + print ("/") +# csa.endgame(ENDGAME_SYMBOLS[hcpe['result']]) p += 1 +print (p,move_sum,v_pos_sum) + + From 29f700cfef494b1065c9fd2077e35a818a35e0bc Mon Sep 17 00:00:00 2001 From: yssaya Date: Thu, 10 Nov 2022 17:49:11 +0900 Subject: [PATCH 3/7] test for calling other usi engine to re-ordering its best move. --- learn/convert/hcpe_to_csa.py | 59 ++++++ learn/convert/readme.txt | 87 ++++++++ src/usi-engine/Makefile | 2 +- src/usi-engine/bona/pipe.cpp | 338 ++++++++++++++++++++++++++++++++ src/usi-engine/bona/proce.cpp | 1 + src/usi-engine/bona/shogi.h | 3 +- src/usi-engine/bona/yss_dcnn.h | 5 + src/usi-engine/bona/yss_net.cpp | 4 + src/usi-engine/bona/ysszero.cpp | 14 ++ 9 files changed, 511 insertions(+), 2 deletions(-) create mode 100644 learn/convert/hcpe_to_csa.py create mode 100644 src/usi-engine/bona/pipe.cpp diff --git a/learn/convert/hcpe_to_csa.py b/learn/convert/hcpe_to_csa.py new file mode 100644 index 0000000..fa3a2dc --- /dev/null +++ b/learn/convert/hcpe_to_csa.py @@ -0,0 +1,59 @@ +from cshogi import * +import numpy as np +import math + +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument('hcpe') +#parser.add_argument('psv') +args = parser.parse_args() + +hcpes = np.fromfile(args.hcpe, dtype=HuffmanCodedPosAndEval) +#psvs = np.zeros(len(hcpes), PackedSfenValue) +move_sum = 0 +res = [0,0,0] +s_max = -999999 +s_min = +999999 +board = Board() +for hcpe in hcpes: +#for hcpe in zip(hcpes): + board.set_hcp(hcpe['hcp']) +# board.to_psfen(psv['sfen']) + score = hcpe['eval'] + move16 = hcpe['bestMove16'] + move_csa = move_to_csa(board.move_from_move16(move16)) + #move_num = hcpe['moveNum'] + result = hcpe['gameResult'] + # gameResult -> 0: DRAW, 1: BLACK_WIN, 2: WHITE_WIN + #if board.turn == gameResult - 1: + # psv['game_result'] = 1 + #elif board.turn == 2 - gameResult: + # psv['game_result'] = -1 + move_sum += 1 + res[result] += 1 + if score > s_max: + s_max = score + if score < s_min: + s_min = score + print ("'move_sum=" +str(move_sum) +",result=" +str(result) + ",move=" + move_csa + ",score=" + str(score)) + print (board) + + if (board.turn) == 0: + sen = "+" + else: + sen = "-" + v = 1.0 / (1.0 + math.exp(-score * 0.0013226)) + comment = f"v={v:.3f}," + print (sen + move_csa + "," + comment) + s_res = "%SENNICHITE" + if result == 1: + s_res = "%-ILLEGAL_ACTION" + elif result == 2: + s_res = "%+ILLEGAL_ACTION" + print (s_res) + print ("/") + +#psvs.tofile(args.psv) +print ("'move_sum=" +str(move_sum) +",result=" +str(result) + ",move=" + move_csa + ",score=" + str(score)) +print ("'res[0]=" +str(res[0]) +",[1]=" +str(res[1]) + ",[2]=" + str(res[2]) + ",s_max="+str(s_max)+",s_min"+str(s_min)) diff --git a/learn/convert/readme.txt b/learn/convert/readme.txt index e2524db..42ba3ec 100644 --- a/learn/convert/readme.txt +++ b/learn/convert/readme.txt @@ -1,4 +1,91 @@ +GCT�̊w�K�Ɏg�p�����f�[�^�Z�b�g�����J https://tadaoyamaoka.hatenablog.com/entry/2021/05/06/223701 python3 ./hcpe3_to_csa.py --aoba --out_v --sort_visits selfplay_gct-051.hcpe3 dummy.csa >> selfplay_gct-051.csa +����AI�̎����m�[�g�F����̕��z���w�K ���̂Q +https://tadaoyamaoka.hatenablog.com/entry/2021/03/16/235251 +���t�ǖʃt�H�[�}�b�g(hcpe) +�ǖʒP�ʂɏo�͂��āA�s�v�ȋǖʁi���@�肪1�肵���Ȃ��ǖʁj�́A�o�͂��Ă��Ȃ������B +�܂��A�΋ǂ̋��ڂ����m�ɂ킩��Ȃ� + + +hcpe_to_csa.py �͈ȉ��̃f�B���N�g���ɒu���Ď��s + +$ python3 /home/yss/shogi/dlshogi_dr2_exhi/dlshogi/utils/hcpe_to_csa.py dlshogi_with_gct-024.hcpe > dlshogi_with_gct-024.csa + +��菟���A�̋ǖʂ� +%-ILLEGAL_ACTION (��肪��������w�����̂Ő�菟��) +��菟���A�̋ǖʂ� +%+ILLEGAL_ACTION (��肪��������w�����̂Ō�菟��) +�𗘗p�B + + + + + + + +selfplay_gct-001.hcpe3 +308133 ���� 3638986140 byte�ŁB +308133 * 75 = 23,109,975 +2300���������x�B����75����x�ŁA15���ǖʁB + + + + +dlshogi_with_gct-001.hcpe , 356250760 byte +'move_sum=9375020,'res[0]=215305,[1]=4828653,[2]=4331062,s_max=30000,s_min-30000 + +339MB�� 937���ǖʁA���łɃV���b�t������Ă���B�����ɖ߂��͍̂���B +024 �܂�24�‚� +9375020 * 24 = 225,000,480 2���ǖ� + +339 MB *24 = 8GB + +hcpe�ɂ͋ǖ�(���)�A���̎�A�]���l(-30000 <= v <= +30000),�΋nj���(0...���������A1...��菟���A2...��菟��) +��4�‚̂݁B�萔�͂Ȃ��B +���� +4828653�� +4331062�s + 215305�� +------------- +(4828653 + 215305/2) / 9375020 = 0.5265 ��菟��0.526�ł���قǍ����Ȃ��B + + +suisho3kai-001.csa , 55417414 byte / 1458353 �ǖ� = 38 byte/�ǖʁB�Ֆʂ�32byte���B +842825�� +615528�s + 0�� +------------- +1458353 �ǖ�, 0.5779 ��菟����0.57�ō����B �]���l(-31996 <= v <= +31997)�B�͈͂��Ⴄ�ȁB +24�‚� 1458353 * 24 = 35,000,472 �ǖ� + +floodgate_2019-2021_r3500-001.hcpe , 11078976 byte +155969 �� +125196 �s + 10387 �� +----------- +291552 �ǖ� 0.5527 �]���l(-32767 <= v <= +32767) �B������������ɔ͈͂��Ⴄ +24�‚� 291552 * 24 = 6,997,248 �ǖ� + + +���v +225,000,480 dlshogi_with_gct + 35,000,472 suisho3kai + 6,997,248 floodgate_2019-2021_r3500 +-------------------- +266,998,200 2��6�疜�ǖʁB2021�N5��6���ł͖�6���B + +1�ǖ�100byte�Ƃ���25GB�B + +CSA�ɕϊ����ēǂݍ��񂾌�A +266998238 �ǖʁB��⑽���B + 76.9 * 128GB = 98GB���g�p�B + + +139910027 ��菟�� +121668247 ��菟�� + 5419964 �������� +---------------------- +266998238 ��菟�� (0.534) diff --git a/src/usi-engine/Makefile b/src/usi-engine/Makefile index 43d9bf7..49468da 100644 --- a/src/usi-engine/Makefile +++ b/src/usi-engine/Makefile @@ -71,7 +71,7 @@ bases = Network Leela Utils Zobrist GTP Random SMP OpenCL OpenCLScheduler NNCac ifneq ($(CPU_ONLY), 1) bases += iobase xzi err shogibase osi nnet nnet-cpu nnet-ocl nnet-srv opencli nnet-ipc option endif -bases += $(addprefix bona/, data main io proce utility ini attack book makemove unmake time csa valid bitop iterate searchr search quiesrch evaluate swap hash root next movgenex genevasn gencap gennocap gendrop mate1ply rand learn1 learn2 evaldiff problem ponder thread sckt debug mate3 genchk phash dfpn dfpnhash ysszero yss_net) +bases += $(addprefix bona/, data main io proce utility ini attack book makemove unmake time csa valid bitop iterate searchr search quiesrch evaluate swap hash root next movgenex genevasn gencap gennocap gendrop mate1ply rand learn1 learn2 evaldiff problem ponder thread sckt debug mate3 genchk phash dfpn dfpnhash ysszero yss_net pipe) sources = $(addsuffix .cpp, $(bases)) objects = $(addsuffix .o, $(bases)) diff --git a/src/usi-engine/bona/pipe.cpp b/src/usi-engine/bona/pipe.cpp new file mode 100644 index 0000000..86de232 --- /dev/null +++ b/src/usi-engine/bona/pipe.cpp @@ -0,0 +1,338 @@ +// 2022 Team AobaZero +// This source code is in the public domain. +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include "shogi.h" + +#include "yss_var.h" +#include "yss_dcnn.h" +#include "param.hpp" + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +const int CHILD_MAX = 1; +int pid_child[CHILD_MAX]; + +int pfd_a[CHILD_MAX][2]; +int pfd_b[CHILD_MAX][2]; +FILE *to_engine_stream[CHILD_MAX], *from_engine_stream[CHILD_MAX]; + +const int USI_MAX_LINES = 100; +const int USI_BUF_SIZE = 128*16; +char usi_commnad_line[USI_BUF_SIZE]; +char usi_return_latest_line[CHILD_MAX][USI_BUF_SIZE]; +char usi_return_line[CHILD_MAX][USI_MAX_LINES][USI_BUF_SIZE]; +int usi_return_line_num[CHILD_MAX]; + +void kill_usi_child() { + int sig = SIGTERM; //SIGKILL��AobaZero���E���Ƌ��L���������������Ȃ� + int i; + for (i=0; i%s", sSend); + + fprintf(to_engine_stream[n], "%s", sSend); + fflush(to_engine_stream[n]); + usi_return_line_num[n] = 0; + usi_return_latest_line[n][0] = 0; + if ( strlen(wait)==0 ) return; + + for (;;) { + char usi_line[USI_BUF_SIZE]; + if ( fgets(usi_line, USI_BUF_SIZE, from_engine_stream[n]) == NULL ) break; + PRT("<-%s",usi_line); + strcpy(usi_return_line[n][usi_return_line_num[n]], usi_line); + usi_return_line_num[n]++; + if ( usi_return_line_num[n] >= USI_MAX_LINES ) error("usi lines over"); + strcpy(usi_return_latest_line[n], usi_line); + if ( strstr(usi_line, wait) ) break; + } +} + +#define ENGINE_DIR "/home/yss/prg/Kristallweizen" +//#define ENGINE_DIR "/home/yss/shogi/suisho5" + +void run_usi_engine() +{ + setbuf(stdout, NULL); + setbuf(stderr, NULL); + + const char *sRun[CHILD_MAX][5] = { +// { ENGINE_DIR "/yane750zen2","","","","" }, + { ENGINE_DIR "/yane483_nnue_avx2","","","","" }, + }; + + int i; + for (i=0; i= 0x09 ) nf = 0x08; + if ( nf ) s += "+"; + int kk = abs(k) & 0x07; + if ( kk == 0 ) kk = abs(k); + sprintf(buf,"%c",usi_koma[kk&0x0f]+32*(k<0)); + s += buf; + } + if ( y!=8 ) s += "/"; + } + + int moves = ptree->nrep + ply - 1; + + const char ct[2] = { 'b','w' }; + sprintf(buf, " %c ",ct[sideToMove]); +// sprintf(buf, " %c ",ct[moves&1]); + s += buf; + int i,sum = 0; + for (i=1;i<=7;i++) { + int n = get_motigoma(i, HAND_B); + sum += n; + if ( n==0 ) continue; + if ( n > 1 ) { sprintf(buf,"%d",n); s += buf; } + s += usi_koma[i]; + } + for (i=1;i<=7;i++) { + int n = get_motigoma(i, HAND_W); + sum += n; + if ( n==0 ) continue; + if ( n > 1 ) { sprintf(buf,"%d",n); s += buf; } + s += (char)(usi_koma[i]+32); + } + if ( sum==0 ) s += "-"; + sprintf(buf," %d\n",moves+1); // ���Ɏw���萔 + s += buf; + return s; +} +/* +int usi_str_z(char *str) +{ + int x = str[0] - '1' + 1; + int y = str[1] - 'a' + 1; + int z = (y<<4) + (10 - x); + return z; +} + +char *str_usi_move(int bz,int az,int tk,int nf) +{ + static char str[10]; + memset(str,0,sizeof(str)); + if ( bz==0xff ) { + sprintf(str,"%c*",usi_koma[tk&0x0f]); + } else { + sprintf(str,"%s", usi_z_str(bz)); + } + strcat(str,usi_z_str(az)); + if ( nf ) strcat(str,"+"); + return str; +} + +// �ړ��O���ړ���A�̂ݐ��������� +void shogi::getCsaStr(char *str, int bz,int az,int tk,int nf, int SenteTurn) +{ + char turn[2] = { '-','+' }; + int k,x,bb,aa; + if ( bz == 0xff ) { + bb = 0x00; + k = tk & 0x07; + } else { + k = init_ban[bz] & 0x0f; + if ( k==0 ) { + k = init_ban[az] & 0x0f; + } else { + if ( nf ) k += 8; + } + x = 10 - (bz & 0x0f); + bb = (bz>>4) + (x<<4); + } + x = 10 - (az & 0x0f); + aa = (az>>4) + (x<<4); + sprintf(str,"%c%02X%02X%s",turn[SenteTurn],bb,aa,koma[k]); +} + +void usistr_to_move(tree_t * restrict ptree, char *str, int *p_bz, int *p_az, int *p_tk, int *p_nf, int fSenteTurn) +{ + // position startpos moves 7g7f 3c3d 2g2f+ G*6i + int nLen = strlen_int(str); + if ( nLen < 4 || nLen > 5 ) { DEBUG_PRT("position move Err nLen=%d,%s\n",nLen,str); } + int bz,az,tk=0,nf=0; + if ( str[1] == '*' ) { + bz = 0xff; + for (tk=0;;tk++) { + if ( usi_koma[tk] == str[0] ) break; + if ( usi_koma[tk] == 0 ) { DEBUG_PRT("not found drop piece\n"); } + } + if ( ! fSenteTurn ) tk |= 0x80; + } else { + bz = usi_str_z(&str[0]); + } + az = usi_str_z(&str[2]); + if ( str[4] == '+' ) nf = 0x08; + if ( bz != 0xff ) tk = init_ban[az]; + + if ( 0 ) { // CSA file output + char tmp[TMP_BUF_LEN]; + getCsaStr(tmp, bz,az,tk,nf, fSenteTurn); + PRT("%s",tmp); + if ( ((tesuu+1)%8)==0 ) PRT("\n"); else PRT(","); + } +// PRT("%x,%x,%x,%x\n",bz,az,tk,nf); + *p_bz = bz; *p_az = az; *p_tk = tk; *p_nf = nf; +} +*/ + +unsigned int get_best_move_alphabeta_usi(tree_t * restrict ptree, int sideToMove, int ply) +{ + // sfen�� + std::string s = get_sfen_string(ptree, sideToMove, ply); + PRT("%s",s.c_str()); + + static int fDone = 0; + if ( fDone==0 ) { + fDone = 1; + run_usi_engine(); + send_wait(0, "usi\n", "usiok"); + send_wait(0, "isready\n","readyok"); + send_wait(0, "setoption name BookMoves value 0\n",""); + send_wait(0, "setoption Threads value 1\n",""); + send_wait(0, "setoption NodesLimit value 500000\n",""); + } + send_wait(0, s.c_str(),""); + send_wait(0, "go\n","bestmove"); + PRT("%s",usi_return_latest_line[0]); + char *p = strstr(usi_return_latest_line[0],"bestmove"); + if ( p==NULL ) DEBUG_PRT(""); + char *str = p+9; + char *q = strchr(str,' '); + char *r = strchr(str,'\n'); + if ( r==NULL ) DEBUG_PRT(""); + if ( q == NULL ) *r = 0; + else *q = 0; + if ( strstr(str,"resign") ) { + return 0; + } +// int bz,az,tk,nf; +// usistr_to_move(ptree, str, &bz,&az,&tk,&nf, !sideToMove); + + char str_buf[7]; + unsigned int move; + if ( usi2csa( ptree, str, str_buf ) < 0 ) { DEBUG_PRT(""); } + PRT("csa:%s\n",str_buf); + int tmp_root_turn = root_turn; + root_turn = sideToMove; + if ( interpret_CSA_move( ptree, &move, str_buf ) < 0 ) { DEBUG_PRT(""); } + root_turn = tmp_root_turn; + + PRT("=%s=(%08x)\n",str,move); + return move; +} + diff --git a/src/usi-engine/bona/proce.cpp b/src/usi-engine/bona/proce.cpp index 3b44c70..ea4a7be 100644 --- a/src/usi-engine/bona/proce.cpp +++ b/src/usi-engine/bona/proce.cpp @@ -488,6 +488,7 @@ static int CONV proce_usi( tree_t * restrict ptree ) if ( ! strcmp( token, "position" ) ) { return usi_posi( ptree, &lasts ); } if ( ! strcmp( token, "quit" ) ) { stop_thread_submit(); + kill_usi_child(); return cmd_quit(); } if ( ! strcmp( token, "d" ) ) { diff --git a/src/usi-engine/bona/shogi.h b/src/usi-engine/bona/shogi.h index 5c35d7a..eb09c37 100644 --- a/src/usi-engine/bona/shogi.h +++ b/src/usi-engine/bona/shogi.h @@ -134,7 +134,8 @@ extern unsigned char ailast_one[512]; //#define BNZ_VER "34" // 20220429 dfpn time limit stop. //#define BNZ_VER "35" // 20220430 perpetual check bug fixed(again). //#define BNZ_VER "36" // 20220626 pawn ,rook, bishop are always promoted. discovered attack moves have 30% of best policy. safe LCB, kldgain 0.000005. -#define BNZ_VER "37" // 20220626 kldgain 000000075. ave playouts is 1568/move. +//#define BNZ_VER "37" // 20220626 kldgain 000000075. ave playouts is 1568/move. +#define BNZ_VER "38" // 20221110 test get_best_move_alphabeta_usi(). #define BNZ_NAME "AobaZero" //#define BNZ_VER "16" // 20210528 komaochi, mate3 diff --git a/src/usi-engine/bona/yss_dcnn.h b/src/usi-engine/bona/yss_dcnn.h index 1ac4729..f71454e 100644 --- a/src/usi-engine/bona/yss_dcnn.h +++ b/src/usi-engine/bona/yss_dcnn.h @@ -145,5 +145,10 @@ int count_square_attack(tree_t * restrict ptree, int sideToMove, int square ); void kiki_count_indirect(tree_t * restrict ptree, int kiki_count[][81], int kiki_bit[][2][81], bool fKikiBit); void update_HandicapRate(const char *token); void update_AverageWinrate(const char *token); +int get_motigoma(int m, int hand); + +// pipe.cpp +unsigned int get_best_move_alphabeta_usi(tree_t * restrict ptree, int sideToMove, int ply); +void kill_usi_child(); #endif //]] INCLUDE__GUARD diff --git a/src/usi-engine/bona/yss_net.cpp b/src/usi-engine/bona/yss_net.cpp index 52a090e..469b9f2 100644 --- a/src/usi-engine/bona/yss_net.cpp +++ b/src/usi-engine/bona/yss_net.cpp @@ -256,6 +256,7 @@ void on_terminate_aobaz() { // clean up shared memory try { OSI::MMap::cleanup(); } catch (std::exception &e) { std::cerr << e.what() << std::endl; } + kill_usi_child(); abort(); } @@ -308,6 +309,7 @@ void debug() { on_terminate_aobaz(); } #endif + kill_usi_child(); exit(0); } @@ -470,6 +472,7 @@ void set_dcnn_channels(tree_t * restrict ptree, int sideToMove, int ply, float * int loop; #ifdef USE_POLICY2187 const int T_STEP = 6; +// const int T_STEP = 1; #else const int T_STEP = 8; #endif @@ -559,6 +562,7 @@ void set_dcnn_channels(tree_t * restrict ptree, int sideToMove, int ply, float * } } +// if ( T_STEP == 1 ) base += (28 + 14 + 3) * (6 - 1); #ifdef USE_POLICY2187 /* nnet.cpp で入力特徴がfillとそれ以外で最適化されてるので、それに合わせる。最初の28が通常、残り17がfill。合計45。28+10=38が通常。1+6=7 がfill diff --git a/src/usi-engine/bona/ysszero.cpp b/src/usi-engine/bona/ysszero.cpp index 85ca27c..58b157d 100644 --- a/src/usi-engine/bona/ysszero.cpp +++ b/src/usi-engine/bona/ysszero.cpp @@ -1515,6 +1515,20 @@ if (0) { if ( pc->bias < b ) pc->bias = b; } } + + if ( 0 ) { +// print_board(ptree); + unsigned int best_usi = get_best_move_alphabeta_usi( ptree, sideToMove, ply); + for (i = 0; i < phg->child_num; i++) { + CHILD *pc = &phg->child[i]; + if ( pc->move != (int)best_usi ) continue; + PRT("found best_usi:ply=%2d,col=%d:%3d:%s,bias=%.5f,max_bias=%.5f\n",ply,sideToMove,i, string_CSA_move(best_usi).c_str(),pc->bias,max_bias); + float b = max_bias / 3.0; + if ( pc->bias < b ) pc->bias = b; + break; + } + } + } if ( 0 ) { // 1手の静止探索を From 8a33668bd1285b27c5a0e8913402b90c60efa38c Mon Sep 17 00:00:00 2001 From: yssaya Date: Thu, 15 Dec 2022 14:24:27 +0900 Subject: [PATCH 4/7] fSkipOneReply --- src/usi-engine/bona/ysszero.cpp | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/src/usi-engine/bona/ysszero.cpp b/src/usi-engine/bona/ysszero.cpp index 58b157d..77ce1ae 100644 --- a/src/usi-engine/bona/ysszero.cpp +++ b/src/usi-engine/bona/ysszero.cpp @@ -50,7 +50,8 @@ bool fLCB = true; double MinimumKLDGainPerNode = 0; //0.000002; 0で無効, lc0は 0.000005 bool fResetRootVisit = false; bool fDiffRootVisit = false; - +bool fSkipOneReply = false; // 王手を逃げる手が1手の局面は評価せずに木を降りる + int nLimitUctLoop = 100; double dLimitSec = 0; int nDrawMove = 0; // 引き分けになる手数。0でなし。floodgateは256, 選手権は321 @@ -1386,6 +1387,11 @@ if (0) { } phg->child_num = move_num; + if ( fSkipOneReply ) { + static int count, all; all++; + if ( move_num==1 ) PRT("move_num=1,ply=%d,%d/%d\n",ply,++count,all); + } + if ( NOT_USE_NN ) { // softmax const float temperature = 1.0f; @@ -1414,7 +1420,11 @@ if (0) { // { static double va[2]; static int count[2]; va[sideToMove] += v; count[sideToMove]++; PRT("va[]=%10f,%10f\n",va[0]/(count[0]+1),va[1]/(count[1]+1)); } // PRT("f=%10f,tanh()=%10f\n",f,v); } else { - if ( move_num == 0 ) { + if ( fSkipOneReply && move_num == 1 ) { + v = 0; + CHILD *pc = &phg->child[0]; + pc->bias = 1.0; + } else if ( move_num == 0 ) { // get_network_policy_value() は常に先手が勝で(+1)、先手が負けで(-1)を返す。sideToMove は無関係 v = -1; if ( sideToMove==white ) v = +1; // 後手番で可能手がないなら先手の勝 @@ -1944,6 +1954,7 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply, int *pExactVal // static int count; PRT("has come already? ply=%d,%d\n",ply,++count); //debug(); // 手順前後? 複数スレッドの場合 if ( force_do_playout == 0 ) down_tree = 1; } + if ( fSkipOneReply && phg2->child_num == 1 ) down_tree = 1; win = -phg2->net_value; UnLock(phg2->entry_lock); From dfbfaa30741f275291c2b3943eb991e5ba04fbad Mon Sep 17 00:00:00 2001 From: yssaya Date: Thu, 15 Dec 2022 16:26:24 +0900 Subject: [PATCH 5/7] fSkipKingCheck --- src/usi-engine/bona/ysszero.cpp | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/usi-engine/bona/ysszero.cpp b/src/usi-engine/bona/ysszero.cpp index 77ce1ae..7cfaf91 100644 --- a/src/usi-engine/bona/ysszero.cpp +++ b/src/usi-engine/bona/ysszero.cpp @@ -50,7 +50,8 @@ bool fLCB = true; double MinimumKLDGainPerNode = 0; //0.000002; 0で無効, lc0は 0.000005 bool fResetRootVisit = false; bool fDiffRootVisit = false; -bool fSkipOneReply = false; // 王手を逃げる手が1手の局面は評価せずに木を降りる +bool fSkipOneReply = false; // 王手を逃げる手が1手の局面は評価せずに木を降りる +bool fSkipKingCheck = false; // 王手がかかってる局面では評価せずに木を降りる int nLimitUctLoop = 100; double dLimitSec = 0; @@ -1428,6 +1429,8 @@ if (0) { // get_network_policy_value() は常に先手が勝で(+1)、先手が負けで(-1)を返す。sideToMove は無関係 v = -1; if ( sideToMove==white ) v = +1; // 後手番で可能手がないなら先手の勝 + } else if ( fSkipKingCheck && InCheck(sideToMove) ) { + v = 0; } else { v = get_network_policy_value(ptree, sideToMove, ply, phg); @@ -1606,7 +1609,7 @@ if (0) { phg->deleted = 0; // if ( ! is_main_thread(ptree) && ply==3 ) { PRT("create_node(),ply=%2d,c=%3d,v=%.5f,seqhash=%" PRIx64 "\n",ply,move_num,v,ptree->sequence_hash); print_board(ptree); } -//PRT("create_node done...ply=%d,sideToMove=%d,games_sum=%d,child_num=%d,slot=%d\n",ply,sideToMove,phg->games_sum,phg->child_num, ptree->tlp_slot); +//PRT("create_node done...ply=%d,sideToMove=%d,games_sum=%d,child_num=%d,slot=%d,v=%5.2f\n",ply,sideToMove,phg->games_sum,phg->child_num, ptree->tlp_slot,v); if ( fOpeningHash ) { } else { @@ -1954,12 +1957,20 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply, int *pExactVal // static int count; PRT("has come already? ply=%d,%d\n",ply,++count); //debug(); // 手順前後? 複数スレッドの場合 if ( force_do_playout == 0 ) down_tree = 1; } - if ( fSkipOneReply && phg2->child_num == 1 ) down_tree = 1; + if ( fSkipOneReply && phg2->child_num == 1 ) { +// PRT("down_tree:ply=%d\n",ply); + down_tree = 1; + } + if ( fSkipKingCheck && now_in_check ) { + down_tree = 1; + } + win = -phg2->net_value; UnLock(phg2->entry_lock); Lock(phg->entry_lock); } +// PRT("down_tree=%d,do_playout=%d,ply=%d\n",down_tree,do_playout,ply); if ( down_tree ) { // down tree const int fVirtualLoss = 1; @@ -1979,6 +1990,7 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply, int *pExactVal win = -uct_tree(ptree, Flip(sideToMove), ply+1, &ex_value); Lock(phg->entry_lock); +// PRT("down_tree:ply=%d:child_num=%3d,win=%5.2f\n",ply,phg->child_num,win); if ( fVirtualLoss ) { #ifdef USE_LCB pc->acc_virtual_loss -= VL_N; From 3defa5ffff4b11e8e38cf9e85288d2778599f34f Mon Sep 17 00:00:00 2001 From: yssaya Date: Wed, 21 Dec 2022 15:03:06 +0900 Subject: [PATCH 6/7] raw value and policy are recorded in *.csa. Dynamic Variance-Scaled cPUCT. NN is not called for one reply king escape position. --- learn/yss.cpp | 13 +- learn/yss.h | 6 +- learn/yss_dcnn.cpp | 459 ++++++++++++++++++++++++-------- learn/yss_dcnn.h | 5 + learn/yss_misc.cpp | 8 +- src/autousi/play.cpp | 28 +- src/common/param.hpp | 4 +- src/playshogi/playshogi.cpp | 15 +- src/server/datakeep.cpp | 12 +- src/usi-engine/bona/shogi.h | 3 +- src/usi-engine/bona/ysszero.cpp | 106 ++++++-- 11 files changed, 504 insertions(+), 155 deletions(-) diff --git a/learn/yss.cpp b/learn/yss.cpp index da858b4..598b605 100644 --- a/learn/yss.cpp +++ b/learn/yss.cpp @@ -938,6 +938,15 @@ int shogi::LoadCSA() ZERO_DB *pz = &zdb_one; pz->handicap = get_handicap_from_board(); if ( pz->handicap && fGotekara==0 ) DEBUG_PRT("pz->handiacp=%d\n",pz->handicap); +#if (GCT_SELF==1) + if ( pz->handicap != 0 ) DEBUG_PRT(""); + for (int y=0;y<9;y++) for (int x=0;x<9;x++) { + pz->v_init_pos.push_back(init_ban[(y+1)*16+(x+1)]); + } + for (int i=0;i<7;i++) pz->v_init_pos.push_back(mo_m[i+1]); + for (int i=0;i<7;i++) pz->v_init_pos.push_back(mo_c[i+1]); + pz->v_init_pos.push_back(fGotekara); +#endif } // csa�����Υ����Ȥ������ @@ -1006,7 +1015,7 @@ int shogi::LoadCSA() b0 = b1 = 0; } else { if ( getMoveFromCsaStr(&bz, &az, &tk, &nf, str)==0 ) DEBUG_PRT(""); - int c = (tesuu+(pz->handicap!=0))&1; + int c = (tesuu + fGotekara)&1; if ( is_pseudo_legalYSS((Move)pack_te(bz,az,tk,nf), (Color)(c==1) ) == false ) { DEBUG_PRT("move Err %3d:%s\n",tesuu,str); } @@ -1167,7 +1176,7 @@ P-00AL pz->moves = tesuu; pz->result = ZD_DRAW; pz->result_type = RT_NONE; - int is_gote_turn = (tesuu + (pz->handicap!=0))& 1; + int is_gote_turn = (tesuu + fGotekara) & 1; if ( strstr(lpLine,"TORYO") ) { if ( is_gote_turn ) { diff --git a/learn/yss.h b/learn/yss.h index 5b3d3de..c8c159b 100644 --- a/learn/yss.h +++ b/learn/yss.h @@ -385,7 +385,8 @@ class shogi { void clear_init_ban(); void hirate_ban_init(int n);/*** ���̤������֤��᤹�ʶ�����դ��� ***/ int get_handicap_from_board(); - void ban_saikousei(void); // ���ߤλ��������̤ξ���(ban_init)�򸵤����̾��֤�ƹ������롣 + void ban_saikousei(); // ���ߤλ��������̤ξ���(ban_init)�򸵤����̾��֤�ƹ������롣 + void ban_saikousei_without_kiki(); int is_hirate_ban(); // ʿ������̤�Ƚ�ꤹ�� void hanten_with_hash_kifu(); @@ -440,7 +441,8 @@ class shogi { int wait_and_get_new_kif(int next_weight_n); int add_a_little_from_archive(); int make_www_samples(); - + void get_piece_num_diff(bool bGoteTurn, int d[]); + void sum_pwv(double z, bool bGoteTurn, double sumd[]); // fish��Ϣ diff --git a/learn/yss_dcnn.cpp b/learn/yss_dcnn.cpp index 37f24d6..f7892d2 100644 --- a/learn/yss_dcnn.cpp +++ b/learn/yss_dcnn.cpp @@ -1118,12 +1118,14 @@ void shogi::set_dcnn_channels(Color sideToMove, const int ply, float *p_data, in int x,y; int flip = (ply + (nHandicap!=0)) & 1; // ���λ��������Ҥä����֤� int current_t = ply; // �����̤μ���������õ������ - + if ( GCT_SELF ) flip = (ply + fGotekara) & 1; + if ( GCT_SELF && ply != 0 ) DEBUG_PRT(""); // for AI_book2, hcpe does not have ply. if ( flip && sideToMove != BLACK ) DEBUG_PRT(""); // move_hit_kif[], move_hit_hashcode[] �˴���+õ�������δ���ȥϥå����ͤ�����뤳�� int loop,back_num=0; const int T_STEP = 6; +// const int T_STEP = 1; for (loop=0; loop>4) + (x<<4); + if ( nf ) k += 8; + } + int x = 10 - (az & 0x0f); + a = (az>>4) + (x<<4); + if ( 0 ) { + if ( b ) b = 0xaa - b; + a = 0xaa - a; + } + PRT("%02X%02X%s\n",b,a,koma[k]); + + move_hit_hash(bz,az,tk,nf); + } + DEBUG_PRT("ply=%d,tesuu=%d,flip=%d\n",ply,tesuu,flip); + } + if ( DCNN_CHANNELS != base ) { DEBUG_PRT("Err. DCNN_CHANNELS != base %d\n",base); } } @@ -2707,29 +2754,42 @@ void free_zero_db_struct(ZERO_DB *p) std::vector().swap(p->v_playouts_sum); vector< vector >().swap(p->vv_move_visit); std::vector().swap(p->v_score_x10k); +#if ( GCT_SELF==1) + std::vector().swap(p->v_init_pos); +#endif + } -//const int ZERO_DB_SIZE = 500000; // 100000, 500000 -const int ZERO_DB_SIZE = 1000000; // 100000, 500000 +//const int ZERO_DB_SIZE = 267000000; // AI_book2 +//const int ZERO_DB_SIZE = 20000000; // gct001-075 +//const int ZERO_DB_SIZE = 1000000; // 100000, 500000 +const int ZERO_DB_SIZE = 2190000; // 100000, 500000 const int MAX_ZERO_MOVES = 513; // 512���ܤ��꤬�ؤ��Ƶͤ�Ǥʤ���С�513���ܤ���꤬�ؤ���̵���ǰ���ʬ���� ZERO_DB zdb_one; -ZERO_DB zdb[ZERO_DB_SIZE]; +ZERO_DB *zdb; +//ZERO_DB zdb[ZERO_DB_SIZE]; int *pZDBsum = NULL; unsigned char *pZDBmove; unsigned char *pZDBmaxmove; unsigned short *pZDBplayouts_sum; unsigned short *pZDBscore_x10k; -const int ZDB_POS_MAX = ZERO_DB_SIZE * 256; // 128 = average moves. +const int ZDB_POS_MAX = ZERO_DB_SIZE * 128; // 128 = average moves. 64 = gct001-075 +//const int ZDB_POS_MAX = ZERO_DB_SIZE * 1; // AI book2 int zdb_count = 0; -int zdb_count_start = 51420000;//51370000;//1000000;//48300000;//18000000;// 10000000;//11600000; //10300000; //9500000;//8500000; //7400000; //5220000; //3200000; //2100000; //390000;//130000;//460000;//29700000; //18200000;//23400000; //20300000; //18800000; //16400000; //10300000; //5200000; // 400�����褫���ɤ߹������4000000 -int zero_kif_pos_num = 0; +int zdb_count_start = 59020000;//58410000;//51000000;//1000000; //53920000; 52320000;48000000;1000000;48300000;18000000; 10000000;11600000; 10300000; 9500000;8500000; 7400000; //5220000; //3200000; //2100000; //390000;//130000;//460000;//29700000; //18200000;//23400000; //20300000; //18800000; //16400000; //10300000; //5200000; // 400�����褫���ɤ߹������4000000 +uint64_t zero_kif_pos_num = 0; int zero_kif_games = 0; -const int MINI_BATCH = 128; // aoba_zero.prototxt �� cross_entroy_scale ��Ʊ�����ѹ����뤳�ȡ�layer��name�����ѹ� +int zero_pos_over250; +const int MINI_BATCH = 256; // aoba_zero.prototxt �� cross_entroy_scale ��Ʊ�����ѹ����뤳�ȡ�layer��name�����ѹ� +//const int MINI_BATCH = 32; // aoba_zero.prototxt �� cross_entroy_scale ��Ʊ�����ѹ����뤳�ȡ�layer��name�����ѹ� const int ONE_SIZE = DCNN_CHANNELS*B_SIZE*B_SIZE; // 362*9*9; *4= 117288 *64 = 7506432, 7MB�ˤ�ʤ� mini_batch=64 +int nGCT_files; // 1�Ĥ� selfplay_gct-00*.csa �����äƤ����� +int gct_csa = 1; // �ե������ֹ� +int sum_gct_loads = 0; // 1�ե��������������ɤ߹������˲û������ -const int fReplayLearning = 0; // ���Ǥ˺��줿���褫��Window�򤺤餻�Ƴؽ������� +const int fReplayLearning = 1; // ���Ǥ˺��줿���褫��Window�򤺤餻�Ƴؽ������� const int fWwwSample = 0; // fReplayLearning ��Ʊ����1 @@ -2741,6 +2801,8 @@ void init_zero_kif_db() pZDBmaxmove = (unsigned char*) malloc( ZDB_POS_MAX * sizeof(char) ); pZDBplayouts_sum = (unsigned short*)malloc( ZDB_POS_MAX * sizeof(short) ); pZDBscore_x10k = (unsigned short*)malloc( ZDB_POS_MAX * sizeof(short) ); + zdb = (ZERO_DB*) malloc(ZERO_DB_SIZE * sizeof(ZERO_DB)); + if ( zdb == NULL ) DEBUG_PRT(""); memset(pZDBsum,0,ZERO_DB_SIZE * sizeof(int)); int i; @@ -2751,8 +2813,14 @@ void init_zero_kif_db() } // �Ǹ��Ĵ�٤�archive�Τߥ���å��夹�� +#if ( GCT_SELF==1) +//const int ONE_FILE_KIF_NUM = 350000; +const int ONE_FILE_KIF_NUM = 9375100; +#else +const int ONE_FILE_KIF_NUM = 10000; +#endif static char recent_arch_file[TMP_BUF_LEN]; -static uint64_t recent_arch_table[10000]; // n���ܤδ���γ��ϰ��� +static uint64_t recent_arch_table[ONE_FILE_KIF_NUM]; // n���ܤδ���γ��ϰ��� static char *p_recent_arch = NULL; static uint64_t recent_arch_size; @@ -2760,7 +2828,7 @@ const int USE_XZ_NONE = 0; const int USE_XZ_POOL_ONLY = 1; const int USE_XZ_BOTH = 2; -const int USE_XZ = USE_XZ_BOTH; // 1...pool�Τ� xz �ǡ�2...pool��archive�� xz �� +int USE_XZ = USE_XZ_BOTH; // 1...pool�Τ� xz �ǡ�2...pool��archive�� xz �� // archive��������ֹ�=n �δ������Ф���KifBuf[] �����롣®��̵�롣fp�Ǥ�100���ܰʹߤ��٤�����̵���� int find_kif_from_archive(int search_n) @@ -2774,16 +2842,37 @@ int find_kif_from_archive(int search_n) // char dir_arch[] = "/home/yss/tcp_backup/archive20201207/"; // char dir_arch[] = "/home/yss/prg/komaochi/archive/"; // char dir_arch[] = "/home/yss/koma_syn/archive/"; -// char dir_arch[] = "/home/yss/tcp_backup/archive20201207/"; - char dir_arch[] = "/home/yss/tcp_backup/archive/"; - + char dir_arch[] = "/home/yss/tcp_backup/archive20201207/"; int arch_n = (search_n/10000) * 10000; // 20001 -> 20000 + if ( GCT_SELF ) { + USE_XZ = USE_XZ_NONE; + if ( search_n - sum_gct_loads == nGCT_files && nGCT_files != 0 ) { + sum_gct_loads = zdb_count; + gct_csa++; + nGCT_files = 0; + PRT("next gct_csa=%d,zdb_count=%d\n",gct_csa,zdb_count); + } + if ( gct_csa == 24*3+1 ) { PRT("stop memory.\n"); return 0; } +// if ( gct_csa >= 2 ) { PRT("stop memory.\n"); return 0; } + } + char filename[TMP_BUF_LEN]; if ( USE_XZ==USE_XZ_BOTH ) { sprintf(filename,"%sarch%012d.csa.xz",dir_arch,arch_n); } else { sprintf(filename,"%sarch%012d.csa",dir_arch,arch_n); + if ( GCT_SELF ) { +// sprintf(filename,"/home/yss/prg/learn/selfplay_gct-%03d.csa",gct_csa); +// sprintf(filename,"/home/yss/shogi/dlshogi_hcpe/20210506/selfplay_gct-%03d.csa",gct_csa); + if ( gct_csa < 25 ) { + sprintf(filename,"/home/yss/shogi/dlshogi_hcpe/dlshogi_with_gct-%03d.csa",gct_csa); + } else if ( gct_csa < 49 ) { + sprintf(filename,"/home/yss/shogi/dlshogi_hcpe/suisho3kai-%03d.csa",gct_csa - 24); + } else { + sprintf(filename,"/home/yss/shogi/dlshogi_hcpe/floodgate_2019-2021_r3500-%03d.csa",gct_csa - 48); + } + } } // PRT("try open %s\n",filename); if ( strcmp(filename, recent_arch_file) != 0 ) { @@ -2796,7 +2885,7 @@ int find_kif_from_archive(int search_n) PRT("not found. %s\n",filename); return 0; } - PRT("%s, size=%d\n",filename,size); + PRT("%s, size=%lu\n",filename,size); if ( p_recent_arch ) free(p_recent_arch); p_recent_arch = (char *)malloc(size); if ( p_recent_arch==NULL ) { PRT("fail malloc size=%d\n",size); exit(0); } @@ -2812,7 +2901,7 @@ int find_kif_from_archive(int search_n) stat(filename, &st); size = st.st_size; - PRT("%s, size=%d\n",filename,size); + PRT("%s, size=%lu\n",filename,size); if ( p_recent_arch ) free(p_recent_arch); p_recent_arch = (char *)malloc(size); if ( p_recent_arch==NULL ) { PRT("fail malloc size=%d\n",size); exit(0); } @@ -2829,46 +2918,45 @@ int find_kif_from_archive(int search_n) if ( c!='\n' ) continue; lines++; if ( i= ONE_FILE_KIF_NUM ) DEBUG_PRT(""); g++; recent_arch_table[g] = i+2; } } strcpy(recent_arch_file, filename); recent_arch_size = size; - if ( g != 10000 - 1 ) PRT("Err g=%d\n",g); + nGCT_files = g; + if ( GCT_SELF == 0 && g != ONE_FILE_KIF_NUM - 1 ) PRT("Err g=%d\n",g); PRT("lines=%d,g=%d,%.3f sec\n",lines,g,get_spend_time(ct1)); // 100��36�á�1��0.36�� // for (i=0;i<10;i++) PRT("%d:%d\n",i,recent_arch_table[i]); exit(0); } int n = search_n - arch_n; - if ( n < 0 || n >= 10000 ) { PRT("err\n"); exit(0); } - + if ( GCT_SELF==0 && (n < 0 || n >= 10000) ) DEBUG_PRT("err\n"); + if ( GCT_SELF ) n = search_n - sum_gct_loads; + KifBuf[0] = 0; nKifBufNum = 0; nKifBufSize = 0; - uint64_t start_i = recent_arch_table[n]; - uint64_t next_i = recent_arch_size; - if ( n < 9999 ) next_i = recent_arch_table[n+1] - 1; // '/' ������ʤ� + uint64_t start_i = recent_arch_table[n ]; + uint64_t next_i = recent_arch_table[n+1] - 1; // '/' ������ʤ� + if ( GCT_SELF==0 && n >= 9999 ) next_i = recent_arch_size; uint64_t one_size = next_i - start_i; - if ( one_size > KIF_BUF_MAX-256 || one_size == 0 ) { - DEBUG_PRT("Err one csa kif is too big\n"); - } + if ( one_size > KIF_BUF_MAX-256 || one_size <= 0 ) DEBUG_PRT("Err one csa kif is too big, n=%d,%lu\n",n,one_size); strncpy(KifBuf, p_recent_arch+start_i, one_size); KifBuf[one_size] = 0; nKifBufSize = strlen(KifBuf); - if ( nKifBufSize == 0 ) { PRT("Err size=0, search_n=%d\n",search_n); exit(0); } + if ( nKifBufSize == 0 ) DEBUG_PRT("Err size=0, search_n=%d\n",search_n); // for (int i=0;KifBuf[i]!=0;i++) PRT("%c",KifBuf[i]); PRT("\n"); -// PRT("size=%d\n",nKifBufSize); +// PRT("size=%d,n=%d,one_size=%d,search_n=%d,nGCT_files=%d\n",nKifBufSize,n,one_size,search_n,nGCT_files); return 1; } int find_kif_from_pool(int search_n) { -// char dir_pool[] = "/home/yss/koma_syn/pool"; - char dir_pool[] = "/home/yss/tcp_backup/pool"; - + char dir_pool[] = "/home/yss/koma_syn/pool"; char filename[TMP_BUF_LEN]; if ( USE_XZ ) { sprintf(filename,"%s/no%012d.csa.xz",dir_pool,search_n); @@ -2935,6 +3023,10 @@ void shogi::add_one_kif_to_db() ZERO_DB *pdb = &zdb[zdb_count % ZERO_DB_SIZE]; // �Ť��ΤϾ�� ZERO_DB *p = &zdb_one; free_zero_db_struct(pdb); +#if (GCT_SELF==1) + if ( zdb_count >= ZERO_DB_SIZE ) DEBUG_PRT("ZERO_DB_SIZE is not enoght!\n"); + if ( p->moves <= 0 ) DEBUG_PRT("zdb_count=%d,p->moves=%d",zdb_count,p->moves); +#endif if ( 0 ) { // ��λ������������Υƥ��ȡ���λ�����ͽ�ۤ��ƺǸ�ο������ int t = get_guess_resign_moves(p->moves); int t2 = t & 0xfffe; // ������ @@ -2956,6 +3048,9 @@ void shogi::add_one_kif_to_db() pdb->handicap = p->handicap; // copy(p->v_kif.begin(), p->v_kif.end(), back_inserter(pdb->v_kif)); // copy(p->v_playouts_sum.begin(), p->v_playouts_sum.end(), back_inserter(pdb->v_playouts_sum)); +#if ( GCT_SELF==1) + pdb->v_init_pos = p->v_init_pos; +#endif pdb->v_kif = p->v_kif; pdb->v_playouts_sum = p->v_playouts_sum; pdb->vv_move_visit = p->vv_move_visit; @@ -2989,8 +3084,7 @@ int is_exist_kif_file(int search_n) int nHandicapLastID[HANDICAP_TYPE]; int nHandicapRate[HANDICAP_TYPE]; const char HANDICAP_ID_FILE[] = "handicap_rate.txt"; -//const char HANDICAP_SYN[] = "/home/yss/koma_syn/handicap/handicap.txt"; -const char HANDICAP_SYN[] = "/home/yss/tcp_backup/handicap/handicap.txt"; +const char HANDICAP_SYN[] = "/home/yss/koma_syn/handicap/handicap.txt"; void load_handicap_rate() { @@ -3110,7 +3204,7 @@ void update_pZDBsum() const int G1000 = 1000*H; const int WR_OK_GAMES = 8000; // ľ��Τ����жɿ��ξ�Ψ�ǥ졼�Ȥ���ư - load_handicap_rate(); + if ( GCT_SELF==0 ) load_handicap_rate(); const int MAX_GAMES = (WR_OK_GAMES*12/10)*H; int i; for (i=0;i ZERO_DB_SIZE ) loop = ZERO_DB_SIZE; for (i=0;imoves == 0 ) { PRT("Err. p->moves=0\n"); exit(0); } if ( p->result < 0 || p->result > 2 ) DEBUG_PRT(""); - if ( zero_kif_pos_num + p->moves >= ZDB_POS_MAX ) DEBUG_PRT("ZDB_POS_MAX!"); + if ( zero_kif_pos_num + p->moves >= ZDB_POS_MAX ) DEBUG_PRT("ZDB_POS_MAX! %d/%d,%lu,%d",i,loop,zero_kif_pos_num,p->moves); for (int j=0;jmoves;j++) { int n = zero_kif_pos_num + j; int m0 = j; @@ -3138,10 +3233,12 @@ void update_pZDBsum() if ( m1 > 255 ) m1 = 255; pZDBmove[n] = m0; pZDBmaxmove[n] = m1; - pZDBplayouts_sum[n] = p->v_playouts_sum[j]; + if ( GCT_SELF==0 ) pZDBplayouts_sum[n] = p->v_playouts_sum[j]; + if ( GCT_SELF==1 ) pZDBplayouts_sum[n] = 0; pZDBscore_x10k[n] = p->v_score_x10k[j]; + if ( m0 >= 250 ) zero_pos_over250++; } - +// PRT("%d/%d,%lu,",i,loop,zero_kif_pos_num); zero_kif_pos_num += p->moves; pZDBsum[i] = zero_kif_pos_num; zero_kif_games += (p->moves != 0); @@ -3246,8 +3343,8 @@ void update_pZDBsum() } // if ( fUpdate ) save_handicap_rate(); - float ave_winrate = (float)(res_total_sum[0][1] + res_total_sum[0][0]/2.0)/(res_total_sum[0][0]+res_total_sum[0][1]+res_total_sum[0][2]); - save_average_winrate(ave_winrate); +// float ave_winrate = (float)(res_total_sum[0][1] + res_total_sum[0][0]/2.0)/(res_total_sum[0][0]+res_total_sum[0][1]+res_total_sum[0][2]); +// save_average_winrate(ave_winrate); // PRT("H:"); for (i=0;i=0x0e ) m--; // m = 1...14 + if ( m>=0x08 ) m--; // m = 1...13 + if ( k < 0x80 ) { + d[m]++; + } else { + d[m]--; + } + } + for (i=1;i<8;i++) { + d[13+i] = mo_m[i] - mo_c[i]; + } + if ( bGoteTurn ) for (i=1;i 0 ) piece_w[i] -= add; + if ( piece_d_sum[i] < 0 ) piece_w[i] += add; +// piece_w[i] += -piece_d_sum[i]*add; // unstable + piece_d_sum[i] = 0; + PRT("%f(%f),",piece_w[i],piece_w[i]/fu); + } + PRT("\n"); + } + if ( (count%800)==0 ) { add /= 1.5; PRT("DIV:ADD=%.10f\n",add); } +} + +static uint64_t rand_try = 0; +static uint64_t rand_batch = 0; + #ifdef F2187 void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_policy, float *label_value, float label_policy_visit[][MOVE_2187_MAX]) #else @@ -3600,22 +3779,42 @@ void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_po // pos_sum ���椫��64�ĥ���������� int *ri = new int[mini_batch]; + int *ri_moves = new int[mini_batch]; int i; for (i=0;i (n*9+30) ) { i--; continue; } +// if ( m <= 40 && (rand_m521() % 10) != 0 ) { i--; continue; } + if ( m <= 40 && (rand_m521() % 4) != 0 ) { i--; continue; } +// if ( n < 30 && (int)(rand_m521() % 300) > (n*9+30) ) { i--; continue; } + if ( n < 30 && (float)(rand_m521() % 3000) > exp(8.0*(30-n)/30) ) { i--; continue; } +// if ( n < 30 && (float)(rand_m521() % 1000) > pow(2.0,n) ) { i--; continue; } +// if ( n < 24 ) { i--; continue; } +// double x = 1.0 + abs(n-60) / 10.0;// from policy weight surprize. around 60 moves is most difficult. +// if ( x > 6 ) x = 6; +// double y = 0.003*x*x*x - 0.058*x*x + 0.141*x + 0.896; +// if ( (double)(rand_m521() % 1000) > y*1000 ) { i--; continue; } +// if ( n < 250 ) { i--; continue; } } if ( 1 ) { int s = pZDBplayouts_sum[r]; int x = pZDBscore_x10k[r]; if ( s < 50 && (x==0 || x==10000) ) { i--; continue; } +// if ( x < 1000 ) { i--; continue; } +// if ( x < 2000 && (rand_m521() % 10) != 0 ) { i--; continue; } +// if ( x < 2000 && (int)(rand_m521() % 200) > (x/10) ) { i--; continue; } +// if ( x < 1000 && (int)(rand_m521() % 100) > (x/10) ) { i--; continue; } } int j; @@ -3625,6 +3824,7 @@ void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_po if ( j != i ) { i--; continue; } ri[i] = r; } + rand_batch += mini_batch; for (i=0;ihandicap, false); - + fGotekara = (p->handicap!=0); +#if ( GCT_SELF==1) + if ( GCT_SELF ) { + for (int y=0;y<9;y++) for (int x=0;x<9;x++) { + init_ban[(y+1)*16+(x+1)] = p->v_init_pos[y*9+x]; + } + for (int i=0;i<7;i++) mo_m[i+1] = p->v_init_pos[81+0+i]; + for (int i=0;i<7;i++) mo_c[i+1] = p->v_init_pos[81+7+i]; + fGotekara = p->v_init_pos[81+7*2]; + ban_saikousei_without_kiki(); + } +#endif int fSymmetry = rand_m521() % 2; // ����Ǻ���ȿž fSymmetry = 0; // ����ȿž�ʤ� +// if ( ri_moves[i] < 240 ) fSymmetry = 0; for (j=0;jv_kif[j]>>8, p->v_kif[j]&0xff, (j+(p->handicap!=0))&1, &bz, &az, &tk, &nf); + trans_4_to_2_KDB( p->v_kif[j]>>8, p->v_kif[j]&0xff, (j+fGotekara)&1, &bz, &az, &tk, &nf); - move_hit_hash(bz,az,tk,nf); + move_hit_hash(bz,az,tk,nf); // �����϶�μ��ऴ�Ȥ˺��ľ���Ƥ�Τ����� move_hit_kif[j] = pack_te( bz,az,tk,nf ); move_hit_hashcode[j][0] = hash_code1; // �ؤ�����ζ��̤Υϥå����ͤ������ move_hit_hashcode[j][1] = hash_code2; @@ -3667,14 +3879,15 @@ void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_po if ( j >= p->moves || j!=t ) { DEBUG_PRT("no next move? t=%d(%d) err.j=%d,r=%d\n",t,p->moves,j,r); } if ( 1 ) { int s = p->v_playouts_sum[t]; - int x = p->v_score_x10k[t]; - if ( s < 50 && (x==0 || x==10000) ) DEBUG_PRT("r=%8d,t=%3d,s=%5d,x=%d\n",r,t,s,x); + int x = p->v_score_x10k[t]; + if ( s < 50 && (x==0 || x==10000) ) { static int count; PRT("r=%8d,t=%3d,s=%5d,x=%5d,count=%d\n",r,t,s,x,count++); } +// if ( s < 50 ) PRT("r=%8d,t=%3d,s=%5d,x=%d\n",r,t,s,x); } int bz,az,tk,nf; - bool bGoteTurn = (t+(p->handicap!=0)) & 1; + bool bGoteTurn = (t+fGotekara) & 1; trans_4_to_2_KDB( p->v_kif[j]>>8, p->v_kif[j]&0xff, bGoteTurn, &bz, &az, &tk, &nf); - + int win_r = 0; if ( p->result == ZD_S_WIN ) win_r = +1; if ( p->result == ZD_G_WIN ) win_r = -1; @@ -3695,9 +3908,15 @@ void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_po // �ºݤξ��Ԥ�õ���ͤ�ʿ�Ѥ�ؽ���https://tadaoyamaoka.hatenablog.com/entry/2018/07/01/121411 float ave_r = ((float)win_r + score) / 2.0; +// float ave_r = ((float)win_r*0 + score*10) / 10.0; + +// float m = pZDBmaxmove[r]; +// float n = pZDBmove[r]; +// float b = n/m; +// float ave_r = (float)win_r*b + score*(1-b); if ( score_x10k == NO_ROOT_SCORE ) ave_r = win_r; - ave_r = win_r; // not use average -// PRT("(%.3f,%.3f)",(float)win_r,score); +// ave_r = win_r; // not use average +// PRT("(%6.3f,%6.3f,%6.3f,%3.0f/%3.0f,%.3f)",(float)win_r,score,ave_r,n,m,b); int playmove_id = 0; if ( f2187 ) { @@ -3720,7 +3939,9 @@ void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_po for (k=0; kv_playouts_sum[j]; + int playout_sum = 1; + if ( p->v_playouts_sum.size() > 0 ) playout_sum = p->v_playouts_sum[j]; + if ( playout_sum <= 0 ) DEBUG_PRT(""); label_policy_visit[i][playmove_id] = 1.0f / (float)playout_sum; // ¸�ߤ��ʤ�����1��õ���������Ȥ��� int found = 0; @@ -3746,7 +3967,7 @@ void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_po // PRT("r=%5d:b0=%3d,b1=%3d,[%3d][%3d] (%02x,%02x,%02x,%02x)id=%5d,v=%3d,%6.4f\n",r,b0,b1,j,k,bz,az,tk,nf,id,visit,label_policy_visit[i][id]); if ( id==playmove_id ) found = 1; } - if ( found==0 ) { static int count=0; if ( count++==0 ) PRT("no best move visit. id=%d\n",playmove_id); } + if ( found==0 ) { static int count=0; if ( count++<=10 ) { PRT("\nno best move visit. i=%d,id=%d,bi=%d,t=%d/%d,visit_size=%d\n",i,playmove_id,bi,t,p->moves,p->vv_move_visit[j].size()); hyouji(); } } } float *pd = (float *)data + ONE_SIZE * i; @@ -3764,6 +3985,8 @@ void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_po sum_t +=t; sum_diff_win_r +=fabs(win_r - ave_r); + if ( PIECE_LEARN ) sum_pwv(win_r, bGoteTurn, piece_d_sum); + if ( fPW ) PRT("%3d ",p->weight_n); // if ( fPW ) PRT("%d(%3d) ",bi,p->weight_n); static int tc[MAX_ZERO_MOVES],tc_all; @@ -3777,37 +4000,29 @@ void shogi::prepare_kif_db(int fPW, int mini_batch, float *data, float *label_po ,sum_handicap[0],sum_handicap[1],sum_handicap[2],sum_handicap[3],sum_handicap[4],sum_handicap[5],sum_handicap[6] ,sum_result[0],sum_result[1],sum_result[2], sum_turn[0],sum_turn[1], sum_t/mini_batch, sum_diff_win_r/mini_batch ); // PRT("%.2f sec, mini_batch=%d,%8d,%8.1f,%6.3f\n",get_spend_time(ct1), mini_batch, ri[0], label_policy[0], label_value[0]); - + if ( PIECE_LEARN ) update_piece_w(); delete [] ri; + delete [] ri_moves; } void convert_caffemodel(int iteration, int weight_number) { print_time(); PRT(",convert_caffemodel. weight_number=%4d,zdb_count=%10d,iteration=%d\n",weight_number,zdb_count,iteration); - FILE *fp = fopen("/home/yss/test/extract/aoba.sh","w"); + FILE *fp = fopen("/home/yss/test/koma_ext/aoba.sh","w"); if ( fp==NULL ) DEBUG_PRT(""); fprintf(fp,"#!/bin/bash\n"); - fprintf(fp,"cd /home/yss/test/extract/\n"); - fprintf(fp,"export LD_LIBRARY_PATH=/home/yss/caffe_cpu/build/lib:\n"); - fprintf(fp,"export PYTHONPATH=/home/yss/caffe_cpu/python:$PYTHONPATH\n"); -// fprintf(fp,"python ep_del_bn_scale_factor_version_short_auto.py /home/yss/shogi/yssfish/snapshots/_iter_%d.caffemodel\n",iteration); - fprintf(fp,"python ep_del_bn_scale_factor_version_short_auto.py /home/yss/shogi/learn/snapshots/_iter_%d.caffemodel\n",iteration); -#if 0 - fprintf(fp,"hash=`sha256sum binary.txt | awk '{print $1}'`\n"); - fprintf(fp,"mv binary.txt ${hash}_w%012d.txt\n",weight_number); - fprintf(fp,"xz -z -k ${hash}_w%012d.txt\n",weight_number); - fprintf(fp,"mv ${hash}_w%012d.txt.xz ../../tcp_backup/weight/\n",weight_number); -#else + fprintf(fp,"cd /home/yss/test/koma_ext/\n"); + fprintf(fp,"export LD_LIBRARY_PATH=/home/yss/caffe_cpu/build/lib:/home/yss/cuda/cuda-11.2/lib64:\n"); + fprintf(fp,"python3 ep_short_auto_py3.py /home/yss/shogi/learn/snapshots/_iter_%d.caffemodel\n",iteration); fprintf(fp,"mv binary.txt w%012d.txt\n",weight_number); fprintf(fp,"xz -9 -z -k w%012d.txt\n",weight_number); - fprintf(fp,"mv w%012d.txt.xz ../../tcp_backup/weight/\n",weight_number); -#endif + fprintf(fp,"mv w%012d.txt.xz ../../koma_syn/weight/\n",weight_number); fclose(fp); - int ret = system("bash /home/yss/test/extract/aoba.sh"); + int ret = system("bash /home/yss/test/koma_ext/aoba.sh"); ret = system("sleep 10"); - ret = system("/home/yss/tcp_backup/rsync_weight_only.sh"); + ret = system("/home/yss/koma_syn/rsync_weight_only.sh"); (void)ret; } @@ -4016,7 +4231,6 @@ void start_zero_train(int *p_argc, char ***p_argv ) GlobalInit(p_argc, p_argv); PS->init_prepare_kif_db(); -//exit(0); if ( fWwwSample ) { PS->make_www_samples(); return; } // MemoryDataLayer�ϥ������ͤ���ϤǤ���DataLayer�� @@ -4031,7 +4245,7 @@ void start_zero_train(int *p_argc, char ***p_argv ) // Solver�������ƥ����ȥե����뤫���ɤ߹��� SolverParameter solver_param; if ( ITER_SIZE==1 ) { - ReadProtoFromTextFileOrDie("aoba_zero_solver.prototxt", &solver_param); + ReadProtoFromTextFileOrDie("aoba_solver.prototxt", &solver_param); } else if ( ITER_SIZE==64 ) { if ( MINI_BATCH!=64 ) DEBUG_PRT("MINI_BATCH err\n"); ReadProtoFromTextFileOrDie("aoba_zero_solver_mb64_is64.prototxt", &solver_param); @@ -4046,35 +4260,43 @@ void start_zero_train(int *p_argc, char ***p_argv ) //ɾ���ѤΥǡ�������� const auto net = solver->net(); -// const char sNet[] = "20190419replay_lr001_wd00002_100000_1018000/_iter_36000.caffemodel"; // w449 -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/_iter_300376.caffemodel"; -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/_iter_3160000.caffemodel"; // w627 -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20190817/_iter_1080000.caffemodel"; // w681 -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20190907/_iter_540000.caffemodel"; // w708 -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20191001/_iter_580000.caffemodel"; // w737 -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20191002/_iter_20000.caffemodel"; // w738 -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20191010/_iter_220000.caffemodel"; // w749 -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20191021/_iter_300000.caffemodel"; // w764 bug fix -// const char sNet[] = "/home/yss/shogi/yssfish/snapshots/20191029/_iter_200000.caffemodel"; // w774 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20191029/_iter_312.caffemodel"; // w775 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20191107/_iter_3432.caffemodel"; // w786 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200328/_iter_1370000.caffemodel"; // w923 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200708/_iter_5260000.caffemodel"; // w1449 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20200928/_iter_5970000.caffemodel"; // w2046 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201027/_iter_2440000.caffemodel"; // w2290 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201109/_iter_1520000.caffemodel"; // w2442 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201206/_iter_3070000.caffemodel"; // w2749 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20201228/_iter_2720000.caffemodel"; // w3021 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210111/_iter_1760000.caffemodel"; // w3076 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210131/_iter_2272000.caffemodel"; // w3147 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210311/_iter_4832000.caffemodel"; // w3298 -// const char sNet[] = "/home/yss/shogi/learn/40b_8x_39770000_games_iter_3870190.caffemodel"; // 40b, next = w3460 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210426/_iter_5152000.caffemodel"; // w3459 = w3703 -// const char sNet[] = "/home/yss/shogi/learn/snapshots/20211225/_iter_2112000.caffemodel"; // w3769 -// const char sNet[] = "/home/yss/shogi/learn/20220222_125600_256x20b_swish_no_ave_no_30_from_20220218_071436_iter_600000.caffemodel"; - const char sNet[] = "/home/yss/shogi/learn/snapshots/20220226/_iter_64000.caffemodel"; // w3883 - - int next_weight_number = 3884; // ���ߤκǿ����ֹ� +1 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210604/_iter_10000.caffemodel"; // w0001 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210607/_iter_60000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210610/_iter_90000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210615_lr0001/_iter_400000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210722_lr0001/_iter_1730000.caffemodel"; // w213 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210805_lr0001/_iter_1070000.caffemodel"; // w320 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210828_lr00001/_iter_2030000.caffemodel";// w523 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210922_kldgain/_iter_2380000.caffemodel";// w761 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20210922_kldgain/_iter_2210000.caffemodel";// w744 �����ᤷ +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20211002/_iter_1150000.caffemodel"; // w859 ������� +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20211014/_iter_950000.caffemodel"; // w954 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20211023/_iter_760000.caffemodel"; // w1030 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/20211118/_iter_1390000.caffemodel"; // w1169 +// const char sNet[] = "/home/yss/shogi/learn/20220112_191203_256x20b_swish/_iter_1976875.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220120_161939_256x20b_swish_from_20220112_191203/_iter_620000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220124_121016_256x20b_swish_from_20220120_161939/_iter_630000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220209_014645_256x20b_swish_no_ave_no_30_cos/_iter_1899620.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220216_205552_256x20b_swish_no_ave_no_30_cos_from_20220209_014645/_iter_230000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220218_071436_256x20b_swish_no_ave_no_30_from_20220216_205552/_iter_810000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220328_iter_1248000.caffemodel"; // w3922 +// const char sNet[] = "/home/yss/shogi/learn/snapshots/_iter_90000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220505_234024_256x20b_swish_ave_no_30_from_5392k_games_x3/_iter_800000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220509_125939_256x20b_ave_no_30_from_1000k_games_from_20220505_234024/_iter_782130.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220515_105749_256x20b_ave_no_30_from_53930k_games_from_20220509_125939/_iter_367135.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/w3969_iter_2752000.caffemodel"; // w3969 +// const char sNet[] = "/home/yss/shogi/learn/20220601_005103_256x20b_ave_exp_8_30_30_x_m40_4_loop_div4_from_51000k/_iter_600000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220810_014249_256x20b_ave_gct_001_025/_iter_800000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220813_134309_256x20b_ave_gct_026_050_from_20220810_014249/_iter_800000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220819_225132_256x20b_ave_gct_001_025_T1/_iter_800000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220823_120232_256x20b_ave_gct_001_025_T1_from_20220819_225132/_iter_800000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220831_152345_256x20b_ave_book_softmax/_iter_800000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220904_061307_256x20b_ave_book_softmax_from_20220831_152345/_iter_1600000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20220712_111426_256x20b_ave_exp_8_30_30_x_m40_4_loop_div4_from_51000k_20220601_005103/_iter_800000.caffemodel"; +// const char sNet[] = "/home/yss/shogi/learn/20221105_181114_256x20b_ave_exp_8_30_30_x_m40_4_cos_from_58410k_20220712_111426/_iter_480000.caffemodel"; + const char sNet[] = "/home/yss/shogi/learn/20221107_170923_ave_exp_8_30_30_x_m40_4_cos_from_58410k_20221105_181114/_iter_400000.caffemodel"; + + int next_weight_number = 1170; // ���ߤκǿ����ֹ� +1 net->CopyTrainedLayersFrom(sNet); // caffemodel���ɤ߹���dzؽ���Ƴ������� // load_aoba_txt_weight( net, "/home/yss/w000000000689.txt" ); // ��¸��w*.txt���ɤ߹��ࡣ*.caffemodel�򲿤��ɤ߹������� @@ -4087,7 +4309,8 @@ void start_zero_train(int *p_argc, char ***p_argv ) int iter_weight = 0; wait_again: - if ( fReplayLearning ) { + if ( GCT_SELF ) { + } else if ( fReplayLearning ) { add = PS->add_a_little_from_archive(); if ( add < 0 ) { PRT("done..\n"); solver->Snapshot(); return; } if ( zdb_count <= ZERO_DB_SIZE ) goto wait_again; @@ -4095,18 +4318,16 @@ void start_zero_train(int *p_argc, char ***p_argv ) // if ( zdb_count > 13000000 ) { PRT("done...\n"); solver->Snapshot(); return; } // if ( iteration >= 100000*1 ) { PRT("done...\n"); solver->Snapshot(); return; } // if ( iteration > 1000 ) solver_param.set_base_lr(0.01); - } else { - if ( 1 && iteration==0 && next_weight_number==3884 ) { + if ( 0 && iteration==0 && next_weight_number==1170 ) { add = 200; // ���Τߥ��ߡ���10000�����ɲä������Ȥˤ��� } else { add = PS->wait_and_get_new_kif(next_weight_number); } } - const float ADJUST = 1.07142857; const int AVE_MOVES = 128; // 1�ɤ�ʿ�Ѽ�� - float add_mul = ADJUST * (float)AVE_MOVES / MINI_BATCH; + float add_mul = (float)AVE_MOVES / MINI_BATCH; int nLoop = (int)((float)add*add_mul); // MB=64��add*2, MB=128��add*1, MB=180��add*0.711 // (ITER_SIZE*MINI_BATCH)=4096 �ʤ����Ǥ�32����ɬ��(32*128=4096) int min_n = ITER_SIZE*MINI_BATCH / AVE_MOVES; @@ -4116,7 +4337,7 @@ void start_zero_train(int *p_argc, char ***p_argv ) remainder = add - nLoop * min_n; } - const int ITER_WEIGHT_BASE = 32000*AVE_MOVES / (ITER_SIZE*MINI_BATCH); // 10000����(ʿ��128��)���Ȥ�weight����� + const int ITER_WEIGHT_BASE = 10000*AVE_MOVES / (ITER_SIZE*MINI_BATCH); // 10000����(ʿ��128��)���Ȥ�weight����� int iter_weight_limit = ITER_WEIGHT_BASE; float reduce = 1.0; // weight��10000���褴�Ȥdzؽ������10000����8000�ʤɤ˸��餹����������®�٤�®�����뤿�� FILE *fp = fopen("reduce.txt","r"); @@ -4134,9 +4355,15 @@ void start_zero_train(int *p_argc, char ***p_argv ) fclose(fp); } -//nLoop /= 2; +//nLoop /= 4; +//nLoop *= 0.602; // *= 2.66 ... 800000 iteration / ((600000 kifu/ 2000) * 1000 Loop) = 2.66 +//nLoop = (int)((float)nLoop * 0.029755f); +//nLoop = (int)((float)nLoop * 0.72727f); +//nLoop = (int)((float)nLoop * 0.701); + if ( GCT_SELF ) nLoop = 800000*1; +nLoop = 800000*1; - PRT("nLoop=%d,add=%d,add_mul=%.3f,MINI_BATCH=%d,kDataSize=%d,remainder=%d,iteration=%d(%d/%d)\n",nLoop,add,add_mul,MINI_BATCH,kDataSize,remainder,iteration,iter_weight,iter_weight_limit); + PRT("nLoop=%d,add=%d,add_mul=%.3f,MINI_BATCH=%d,kDataSize=%d,remainder=%d,iteration=%d(%d/%d),rand=%lu/%lu(%lf)\n",nLoop,add,add_mul,MINI_BATCH,kDataSize,remainder,iteration,iter_weight,iter_weight_limit, rand_try,rand_batch,(double)rand_try/(double)rand_batch); int loop; for (loop=0;loop input_data; // �礭���Τ�static�� @@ -4183,7 +4410,9 @@ void start_zero_train(int *p_argc, char ***p_argv ) } } -// solver->Snapshot(); +return; + if ( GCT_SELF ) return; goto wait_again; } #endif + diff --git a/learn/yss_dcnn.h b/learn/yss_dcnn.h index 4a86b71..6e2088d 100644 --- a/learn/yss_dcnn.h +++ b/learn/yss_dcnn.h @@ -41,6 +41,8 @@ const int MOVE_C_Y_X_ID_MAX = 3781; const int MOVE_C_Y_X_ID_MAX = 11259; // 3781; #endif +#define GCT_SELF 0 // GCT�δ��� selfplay_gct-???.hcpe3.xz ��Ȥ���硣 https://tadaoyamaoka.hatenablog.com/entry/2021/05/06/223701 + typedef struct ZERO_DB { uint64 hash; // ����򼨤��ϥå��� uint64 date; // ���������(��������Τ�����) @@ -50,6 +52,9 @@ typedef struct ZERO_DB { int result_type;// ��λ�������ꡢ����(513��)�����������Ϣ³����β�ƨ���ˤ��ȿ§���� int moves; // ���(����Υ�������Ʊ��) int handicap; // ����� +#if ( GCT_SELF==1) + vector v_init_pos; // ���϶��̡ܼ��� 81+7*2+1���ϥեޥ��256bit(32byte)��ɽ���Ǥ���Τǰ��̤ϲ�ǽ +#endif vector v_kif; // ���� vector v_playouts_sum; // Root��õ�������̾��800���� vector < vector > vv_move_visit; // (��+������)�Υڥ� diff --git a/learn/yss_misc.cpp b/learn/yss_misc.cpp index 5c108dd..6504d0f 100644 --- a/learn/yss_misc.cpp +++ b/learn/yss_misc.cpp @@ -246,12 +246,18 @@ int shogi::get_handicap_from_board() void shogi::ban_saikousei() { clear_kb_kiki_kn(); - init(); /** kn[] ni kakikomu **/ allkaku(); tesuu = all_tesuu = 0; } +void shogi::ban_saikousei_without_kiki() +{ + clear_kb_kiki_kn(); + init(); /** kn[] ni kakikomu **/ + tesuu = all_tesuu = 0; +} + // ʿ������̤� int shogi::is_hirate_ban() { diff --git a/src/autousi/play.cpp b/src/autousi/play.cpp index b1398e1..d16f85f 100644 --- a/src/autousi/play.cpp +++ b/src/autousi/play.cpp @@ -475,23 +475,31 @@ class USIEngine : public Child { float value = strtof(str_value+2, &endptr); if (endptr == str_value+2 || *endptr != '\0' || value < 0.0f || value == HUGE_VALF) - die(ERR_INT("cannot interpret value %s (engine %s)", - str_value+2, get_fp())); - if (value < th_resign) flag_resign = true; + die(ERR_INT("cannot interpret value %s (engine %s)", str_value+2, get_fp())); + if (value < th_resign) flag_resign = true; // if (value < th_resign && _nmove > 30) flag_resign = true; + str_value = OSI::strtok(nullptr, " ,", &saveptr); + if (!str_value || str_value[0] != 'r' || str_value[1] != '=') + die(ERR_INT("cannot read raw value (engine %s)", get_fp())); + float raw_value = strtof(str_value+2, &endptr); + if (endptr == str_value+2 || *endptr != '\0' || raw_value < 0.0f + || raw_value == HUGE_VALF) + die(ERR_INT("cannot interpret raw_value %s (engine %s)", str_value+2, get_fp())); + + const char *str_count = OSI::strtok(nullptr, " ,", &saveptr); if (!str_count) die(ERR_INT("cannot read count (engine %s)", get_fp())); - + long int num = strtol(str_count, &endptr, 10); if (endptr == str_count || *endptr != '\0' || num < 1 || num == LONG_MAX) die(ERR_INT("cannot interpret visit count %s (engine %s)", str_count, get_fp())); - + num_best = num; { char buf[256]; - sprintf(buf, "v=%.3f,%ld", value, num); + sprintf(buf, "v=%.3f,r=%.3f,%ld", value, raw_value, num); new_info += buf; } @@ -505,20 +513,22 @@ class USIEngine : public Child { die(ERR_INT("bad candidate %s (engine %s)", str_move_usi, get_fp())); new_info += ","; new_info += action.to_str(SAux::csa); - + str_count = OSI::strtok(nullptr, " ,", &saveptr); if (!str_count) die(ERR_INT("cannot read count (engine %s)", get_fp())); num = strtol(str_count, &endptr, 10); - if (endptr == str_count || *endptr != '\0' + char c = *endptr; + bool hasPolicy = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); + if (endptr == str_count || hasPolicy == false || num < 1 || num == LONG_MAX) die(ERR_INT("cannot interpret a visit count %s (engine %s)", str_count, get_fp())); num_tot += num; new_info += ","; - new_info += to_string(num); } } + new_info += to_string(num) + c; } } if (num_best < num_tot) die(ERR_INT("bad counts (engine %s)", get_fp())); _node.take_action(actionPlay); diff --git a/src/common/param.hpp b/src/common/param.hpp index 35c5edd..aeb1afc 100644 --- a/src/common/param.hpp +++ b/src/common/param.hpp @@ -3,9 +3,9 @@ #pragma once namespace Ver { constexpr unsigned char major = 3; // 2...komaochi, 3...Swish - constexpr unsigned char minor = 6; // + constexpr unsigned char minor = 7; // // usi_engine is no use. MUST increase "minor" for kicking old engine by server. Only major and minor are sent to client. - constexpr unsigned short usi_engine = 37; // 1...18 AobaZero, 16...26 komaochi, 27...Swish AobaZero + constexpr unsigned short usi_engine = 39; // 1...18 AobaZero, 16...26 komaochi, 27...Swish AobaZero } #define AOBA_UNIQUE ".oeWK7ZhnLN" diff --git a/src/playshogi/playshogi.cpp b/src/playshogi/playshogi.cpp index bf5e573..2ac6c22 100644 --- a/src/playshogi/playshogi.cpp +++ b/src/playshogi/playshogi.cpp @@ -442,14 +442,21 @@ static void node_update(USIEngine &myself, USIEngine &opponent, char *endptr; float value = strtof(str_value+2, &endptr); if (endptr == str_value+2 || *endptr != '\0' || value < 0.0f || value == HUGE_VALF) die(ERR_INT("cannot interpret value %s (engine)", str_value+2)); + + str_value = strtok(nullptr, " ,"); + if (!str_value || str_value[0] != 'r' || str_value[1] != '=') die(ERR_INT("cannot read raw value %s (engine)",str_value)); + float raw_value = strtof(str_value+2, &endptr); + if (endptr == str_value+2 || *endptr != '\0' || raw_value < 0.0f || raw_value == HUGE_VALF) die(ERR_INT("cannot interpret raw_value %s (engine)", str_value+2)); + const char *str_count = strtok(nullptr, " ,"); if (!str_count) die(ERR_INT("cannot read count (engine)")); + long int num = strtol(str_count, &endptr, 10); if (endptr == str_count || *endptr != '\0' || num < 1 || num == LONG_MAX) die(ERR_INT("cannot interpret visit count %s (engine)", str_count)); int num_all = num; { char buf[256]; - sprintf(buf, "v=%.3f,%ld", value, num); + sprintf(buf, "v=%.3f,r=%.3f,%ld", value, raw_value, num); new_info += buf; } // read candidate moves @@ -470,11 +477,13 @@ static void node_update(USIEngine &myself, USIEngine &opponent, if (!str_count) die(ERR_INT("cannot read count (engine)")); num = strtol(str_count, &endptr, 10); - if (endptr == str_count || *endptr != '\0' || num < 1 || num == LONG_MAX) die(ERR_INT("cannot interpret a visit count %s (engine)", str_count)); + char c = *endptr; + bool hasPolicy = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); + if (endptr == str_count || hasPolicy == false || num < 1 || num == LONG_MAX) die(ERR_INT("cannot interpret a visit count %s (engine)", str_count)); num_tot += num; new_info += ","; - new_info += to_string(num); + new_info += to_string(num) + c; } if (num_all < num_tot) die(ERR_INT("bad counts (engine)")); diff --git a/src/server/datakeep.cpp b/src/server/datakeep.cpp index 9d465ec..2067f82 100644 --- a/src/server/datakeep.cpp +++ b/src/server/datakeep.cpp @@ -243,6 +243,14 @@ examine_record(const char *rec, size_t len_rec, uint64_t &digest, else { if (value < value_min_white) value_min_white = value; } + token = strtok_r(nullptr, ",\'", &saveptr_token); + if (!token || token[0] != 'r' || token[1] != '=') return false; + token += 2; + float raw_value = strtof(token, &endptr); + if (endptr == token || *endptr != '\0' || raw_value < 0.0f + || raw_value == HUGE_VALF) return false; + + token = strtok_r(nullptr, ",\'", &saveptr_token); if (!token) return false; @@ -260,7 +268,9 @@ examine_record(const char *rec, size_t len_rec, uint64_t &digest, token = strtok_r(nullptr, ",\'", &saveptr_token); if (!token) return false; num = strtol(token, &endptr, 10); - if (endptr == token || *endptr != '\0' || num < 1 || num == LONG_MAX) + char c = *endptr; + bool hasPolicy = ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z'); + if (endptr == token || hasPolicy == false || num < 1 || num == LONG_MAX) return false; tot_nchild += 1U; } diff --git a/src/usi-engine/bona/shogi.h b/src/usi-engine/bona/shogi.h index eb09c37..549ee1c 100644 --- a/src/usi-engine/bona/shogi.h +++ b/src/usi-engine/bona/shogi.h @@ -135,7 +135,8 @@ extern unsigned char ailast_one[512]; //#define BNZ_VER "35" // 20220430 perpetual check bug fixed(again). //#define BNZ_VER "36" // 20220626 pawn ,rook, bishop are always promoted. discovered attack moves have 30% of best policy. safe LCB, kldgain 0.000005. //#define BNZ_VER "37" // 20220626 kldgain 000000075. ave playouts is 1568/move. -#define BNZ_VER "38" // 20221110 test get_best_move_alphabeta_usi(). +//#define BNZ_VER "38" // 20221110 test get_best_move_alphabeta_usi(). +#define BNZ_VER "39" // 20221221 raw value and policy are recorded in *.csa. Dynamic Variance-Scaled cPUCT. NN is not called for one reply king escape position. #define BNZ_NAME "AobaZero" //#define BNZ_VER "16" // 20210528 komaochi, mate3 diff --git a/src/usi-engine/bona/ysszero.cpp b/src/usi-engine/bona/ysszero.cpp index 7cfaf91..b3f02a2 100644 --- a/src/usi-engine/bona/ysszero.cpp +++ b/src/usi-engine/bona/ysszero.cpp @@ -50,8 +50,9 @@ bool fLCB = true; double MinimumKLDGainPerNode = 0; //0.000002; 0で無効, lc0は 0.000005 bool fResetRootVisit = false; bool fDiffRootVisit = false; -bool fSkipOneReply = false; // 王手を逃げる手が1手の局面は評価せずに木を降りる +bool fSkipOneReply = true; // 王手を逃げる手が1手の局面は評価せずに木を降りる bool fSkipKingCheck = false; // 王手がかかってる局面では評価せずに木を降りる +bool fRawValuePolicy = true; int nLimitUctLoop = 100; double dLimitSec = 0; @@ -253,7 +254,7 @@ int is_drop_pawn_mate(tree_t * restrict ptree, int turn, int ply) return 1; } -const int USI_BESTMOVE_LEN = MAX_LEGAL_MOVES*(8+5)+10; +const int USI_BESTMOVE_LEN = MAX_LEGAL_MOVES*(8+1+5)+10+10; int YssZero_com_turn_start( tree_t * restrict ptree ) { @@ -908,9 +909,7 @@ int uct_search_start(tree_t * restrict ptree, int sideToMove, int ply, char *buf } const bool fPolicyRealization = false; float keep_root_policy[MAX_LEGAL_MOVES]; - if ( fPolicyRealization ) { - for (int i=0; ichild_num; i++) keep_root_policy[i] = phg->child[i].bias; - } + for (int i=0; ichild_num; i++) keep_root_policy[i] = phg->child[i].bias; const float epsilon = 0.25f; // epsilon = 0.25 const float alpha = 0.15f; // alpha ... Chess = 0.3, Shogi = 0.15, Go = 0.03 @@ -1104,7 +1103,11 @@ int uct_search_start(tree_t * restrict ptree, int sideToMove, int ply, char *buf best_v01 = (pbest->value + 1.0) / 2.0; // -1 <= x <= +1 --> 0 <= x <= +1 } if ( exact_v != ILLEGAL_MOVE ) best_v01 = exact_v; - sprintf(buf_move_count,"v=%.04f,%d",best_v01,sum_games); + if ( fRawValuePolicy ) { + sprintf(buf_move_count,"v=%.04f,r=%.04f,%d",best_v01,(phg->net_value + 1.0) / 2.0,sum_games); + } else { + sprintf(buf_move_count,"v=%.04f,%d",best_v01,sum_games); + } } else { sprintf(buf_move_count,"%d",sum_games); } @@ -1115,7 +1118,51 @@ int uct_search_start(tree_t * restrict ptree, int sideToMove, int ply, char *buf if ( 0 ) strcpy(buf,str_CSA_move(p->move)); // PRT("%s,%d,",str_CSA_move(p->move),p->games); char str[TMP_BUF_LEN]; - sprintf(str,",%s,%d",buf,p->games); + if ( fRawValuePolicy ) { +// static int p_count[301],p_sum = 0; + int j; + for (j=0;jchild_num;j++) { + if ( p->move != phg->child[j].move ) continue; + float f = keep_root_policy[j]; + int min_c = -1; + float min_diff = 10.0; + int c; + for (c=0;c<52;c++) { + double k = 1.144; // 52段階で0.001から0.99 までに分布するように + double min = 0.001; + double p = pow(k,c) * min; + if ( fabs(p - f) < min_diff ) { min_c = c; min_diff = fabs(p - f); } +// PRT("%d:p=%f\n",c,p); + } + if ( min_c < 0 ) DEBUG_PRT(""); + c = 'A' + min_c + (min_c >= 26)*6; // A=0.001,Z=0.028,a=0.033,z=0.954 +// sprintf(str,",%s,%d,%.3f",buf,p->games,f); + sprintf(str,",%s,%d%c",buf,p->games,c); +/* + if ( f >= 0.01 ) p_count[(int)(f*100)]++; + else if ( f >= 0.001 ) p_count[(int)(100+f*1000)]++; + else if ( f >= 0.0001 ) p_count[(int)(200+f*10000)]++; + else p_count[300]++; + p_sum++; +*/ + break; + } + if ( j == phg->child_num ) DEBUG_PRT(""); +/* + if ( (p_sum%10)==0 && p_sum>0 ) { + PRT("\np_sum=%d\n",p_sum); + for (int j=0;j<301;j++) { + PRT("%d,",p_count[j]); + } + PRT("\n"); + for (int j=0;j<301;j++) { + PRT("%.4f,",(float)p_count[j]/p_sum); + } + } +*/ + } else { + sprintf(str,",%s,%d",buf,p->games); + } strcat(buf_move_count,str); // PRT("%s",str); } @@ -1389,8 +1436,8 @@ if (0) { phg->child_num = move_num; if ( fSkipOneReply ) { - static int count, all; all++; - if ( move_num==1 ) PRT("move_num=1,ply=%d,%d/%d\n",ply,++count,all); +// static int count, all; all++; +// if ( move_num==1 ) PRT("move_num=1,ply=%d,%d/%d\n",ply,++count,all); } if ( NOT_USE_NN ) { @@ -1429,8 +1476,8 @@ if (0) { // get_network_policy_value() は常に先手が勝で(+1)、先手が負けで(-1)を返す。sideToMove は無関係 v = -1; if ( sideToMove==white ) v = +1; // 後手番で可能手がないなら先手の勝 - } else if ( fSkipKingCheck && InCheck(sideToMove) ) { - v = 0; +// } else if ( fSkipKingCheck && InCheck(sideToMove) ) { // Policyは必要か +// v = 0; } else { v = get_network_policy_value(ptree, sideToMove, ply, phg); @@ -1716,19 +1763,32 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply, int *pExactVal } double cINIT = 1.25; - const bool fDynamicPUCT = false; - if ( fDynamicPUCT ) { // Dynamic Variance-Scaled cPUCT + const bool fDynamicPUCT = true; + if ( fDynamicPUCT ) { // Dynamic Variance-Scaled cPUCT https://github.com/lightvector/KataGo/blob/master/docs/KataGoMethods.md#dynamic-variance-scaled-cpuct int visits = phg->games_sum; if (visits >= 2) { +// static double stddev_sum = 0; +// static double stddev_min = 1; +// static double stddev_max = 0; +// static int count = 0; float eval_variance = visits > 1 ? phg->squared_eval / (visits - 1) : 1.0f; auto stddev = std::sqrt(eval_variance / visits); - double k = sqrt(stddev) * 4.0;//stddev * 5.0; //sqrt(stddev) * 4.0; - if ( k == 0 ) k = 1.0; - cINIT *= k;// += k - 0.2; // *= k; - PRT("%3d:%2d:games=%4d,mean=%7.4f,squared_eval=%8.2f,stddev=%7.4f,%7.4f,cINIT=%7.4f\n",ptree->nrep,ply,visits,phg->win_sum / visits,phg->squared_eval,stddev,k,cINIT); + double k = sqrt(stddev) * 4.0; //stddev * 5.0; // 根拠なしの式 +// const double AVG_STDDEV = 0.024; // 0.031; +// double k = stddev / AVG_STDDEV; + if ( k < 0.5 ) k = 0.5; + if ( k > 1.4 ) k = 1.4; + if ( 0 && ply==1 ) k = 1.0; + double a = 1.0f / (1.0f+sqrt((double)visits/10000.0f)); // 1 -> 0 + k = a*k + (1.0f-a)*1.0f; // visitsが大きいと1に近づく + cINIT *= k; +// stddev_sum += stddev; +// if ( stddev > stddev_max ) stddev_max = stddev; +// if ( stddev < stddev_min ) stddev_min = stddev; +// count++; +// PRT("%3d:%2d:games=%4d,mean=%7.4f,squared_eval=%6.2f,stddev=%7.4f,%7.4f,cINIT=%7.4f,ave_stddev=%6.4f(%6.4f,%6.4f)\n",ptree->nrep,ply,visits,phg->win_sum / visits,phg->squared_eval,stddev,k,cINIT,stddev_sum/count,stddev_min,stddev_max); } } - if ( 0 && ply==1 ) cINIT = 1.30; /* double c_bias[MAX_LEGAL_MOVES]; if (1) { @@ -1964,7 +2024,15 @@ double uct_tree(tree_t * restrict ptree, int sideToMove, int ply, int *pExactVal if ( fSkipKingCheck && now_in_check ) { down_tree = 1; } - + if (0) { + static int count; + if ( count++ >= 1 ) { + count = 0; + } else { + down_tree = 1; // 無理やり1手先で評価する(1 thread でのテスト) + } + } + win = -phg2->net_value; UnLock(phg2->entry_lock); From e484dc7772219a4e7377f2e61ce63f39a1b73e42 Mon Sep 17 00:00:00 2001 From: yssaya Date: Wed, 21 Dec 2022 16:20:23 +0900 Subject: [PATCH 7/7] for Windows. --- src/usi-engine/bona/pipe.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/usi-engine/bona/pipe.cpp b/src/usi-engine/bona/pipe.cpp index 86de232..8df1ddb 100644 --- a/src/usi-engine/bona/pipe.cpp +++ b/src/usi-engine/bona/pipe.cpp @@ -1,5 +1,6 @@ // 2022 Team AobaZero // This source code is in the public domain. +#include "../config.h" #include #include #include @@ -23,10 +24,13 @@ #include #include #include +#ifdef _WIN32 +#else #include #include #include #include +#endif const int CHILD_MAX = 1; int pid_child[CHILD_MAX]; @@ -48,7 +52,10 @@ void kill_usi_child() { for (i=0; i