-
Notifications
You must be signed in to change notification settings - Fork 117
/
uct.c
1644 lines (1441 loc) · 54.8 KB
/
uct.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#define DEBUG
#include "debug.h"
#include "pachi.h"
#include "board.h"
#include "gtp.h"
#include "chat.h"
#include "move.h"
#include "mq.h"
#include "joseki/joseki.h"
#include "playout.h"
#include "playout/moggy.h"
#include "playout/light.h"
#include "tactics/util.h"
#include "timeinfo.h"
#include "uct/prior.h"
#include "uct/plugins.h"
#include "uct/internal.h"
#include "uct/search.h"
#include "uct/tree.h"
#include "uct/dynkomi.h"
#include "uct/uct.h"
#include "uct/walk.h"
#include "josekifix/josekifix.h"
#include "dcnn/dcnn.h"
#ifdef DISTRIBUTED
#include "uct/slave.h"
#endif
uct_policy_t *policy_ucb1_init(uct_t *u, char *arg);
uct_policy_t *policy_ucb1amaf_init(uct_t *u, char *arg, board_t *board);
static void uct_pondering_start(uct_t *u, board_t *b0, tree_t *t, enum stone color, coord_t our_move, int flags);
static void uct_genmove_pondering_save_replies(uct_t *u, board_t *b, enum stone color, coord_t next_move);
static void uct_genmove_pondering_start(uct_t *u, board_t *b, enum stone color, coord_t our_move);
/* Maximal simulation length. */
#define MC_GAMELEN MAX_GAMELEN
static void
setup_state(uct_t *u, board_t *b, enum stone color)
{
size_t size = u->tree_size;
if (DEBUGL(3)) fprintf(stderr, "allocating %i Mb for search tree\n", (int)(size / (1024*1024)));
u->main_board = b;
u->t = tree_init(color, size, stats_hbits(u));
if (u->initial_extra_komi)
u->t->extra_komi = u->initial_extra_komi;
if (u->force_seed)
fast_srandom(u->force_seed);
if (UDEBUGL(3))
fprintf(stderr, "Fresh board with random seed %lu\n", fast_getseed());
if (!u->no_tbook && b->moves == 0) {
if (color == S_BLACK) {
tree_load(u->t, b);
} else if (DEBUGL(0)) {
fprintf(stderr, "Warning: First move appears to be white\n");
}
}
}
static void
reset_state(uct_t *u)
{
if (UDEBUGL(3)) fprintf(stderr, "resetting tree\n");
assert(u->t);
tree_done(u->t);
u->t = NULL;
u->main_board = NULL;
}
static void
setup_dynkomi(uct_t *u, board_t *b, enum stone to_play)
{
if (u->t->use_extra_komi && !pondering(u) && u->dynkomi->permove)
u->t->extra_komi = u->dynkomi->permove(u->dynkomi, b, u->t);
else if (!u->t->use_extra_komi)
u->t->extra_komi = 0;
}
void
uct_prepare_move(uct_t *u, board_t *b, enum stone color)
{
if (u->t) { /* Verify that we have sane state. */
assert(u->t && b->moves);
assert(node_coord(u->t->root) == last_move(b).coord);
assert(u->t->root_color == last_move(b).color);
if (color != stone_other(u->t->root_color))
die("Fatal: Non-alternating play detected %d %d\n", color, u->t->root_color);
#ifdef DISTRIBUTED
uct_htable_reset(u->t);
#endif
} else /* We need fresh state. */
setup_state(u, b, color);
ownermap_init(&u->ownermap);
u->allow_pass = (b->moves > board_earliest_pass(b)); /* && dames < 10 if using patterns */
#ifdef DISTRIBUTED
u->played_own = u->played_all = 0;
#endif
}
static bool pass_is_safe(uct_t *u, board_t *b, enum stone color, bool pass_all_alive, char **msg, bool log,
move_queue_t *dead, move_queue_t *dead_extra, move_queue_t *unclear,
bool unclear_kludge, char *label);
static bool pass_is_safe_(uct_t *u, board_t *b, enum stone color, bool pass_all_alive, char **msg,
move_queue_t *dead, move_queue_t *unclear, bool unclear_kludge);
/* Does the board look like a final position, and do we win counting ?
* (if allow_losing_pass was set we don't care about score)
* If true, returns dead groups used to evaluate position in @dead
* (possibly guessed if there are unclear groups). */
bool
uct_pass_is_safe(uct_t *u, board_t *b, enum stone color, bool pass_all_alive,
move_queue_t *dead, char **msg, bool log)
{
mq_init(dead);
/* Check this early, no need to go through the whole thing otherwise. */
*msg = "too early to pass";
if (b->moves < board_earliest_pass(b))
return false;
/* Must be thread-safe inasmuch as this also gets called from the main thread
* through uct policy choose() (uct_progress_status(), uct_search_check_stop())
* and main board may not be changed even for a split second without risking
* having another thread grab it in an invalid state.
* board_position_final() uses with_move() so copy board first. */
board_t b2;
if (b == u->main_board) { board_copy(&b2, b); b = &b2; }
/* Make sure enough playouts are simulated to get a reasonable dead group list. */
move_queue_t dead_orig;
move_queue_t unclear_orig;
uct_mcowner_playouts(u, b, color);
ownermap_dead_groups(b, &u->ownermap, &dead_orig, &unclear_orig);
#define init_pass_is_safe_groups() do { \
unclear = unclear_orig; \
*dead = dead_orig; \
mq_init(&dead_extra); \
} while (0)
move_queue_t unclear;
move_queue_t dead_extra;
init_pass_is_safe_groups();
if (DEBUGL(2) && log && unclear.moves) mq_print_line("unclear groups: ", &unclear);
/* Guess unclear groups ?
* By default Pachi is fairly pedantic at the end of the game and will
* refuse to pass until everything is nice and clear to him. This can
* take some moves depending on the situation if there are unclear
* groups. Guessing allows more user-friendly behavior, passing earlier
* without having to clarify everything. Under japanese rules this can
* also prevent him from losing the game if clarifying would cost too
* many points.
* Even though Pachi will only guess won positions there is a possibility
* of getting dead group status wrong, so only ok if game setup asks
* players for dead stones and game can resume in case of disagreement
* (auto-scored games like on ogs for example are definitely not ok).
* -> Only enabled if user asked for it or playing japanese on kgs
* (for chinese don't risk screwing up and ending up in cleanup phase) */
bool guess_unclear_ok = pachi_options()->guess_unclear_groups;
if (pachi_options()->kgs)
guess_unclear_ok = (b->rules == RULES_JAPANESE);
/* smart pass: try worst-case scenario first:
* own unclear groups are dead and opponent's are alive.
* If we still win this way for sure it's ok. */
if (guess_unclear_ok && unclear.moves) {
for (int i = 0; i < unclear.moves; i++)
if (board_at(b, unclear.move[i]) == color)
mq_add(&dead_extra, unclear.move[i], 0); /* own groups -> dead */
unclear.moves = 0; /* opponent's groups -> alive */
if (pass_is_safe(u, b, color, pass_all_alive, msg, log,
dead, &dead_extra, &unclear, true, "(worst-case)"))
return true;
init_pass_is_safe_groups(); /* revert changes */
}
/* smart pass: brute force it then. try any combination that works. */
if (guess_unclear_ok && unclear.moves && unclear.moves < 10) {
int n = (1 << unclear_orig.moves);
for (int k = 0; k < n; k++) {
mq_init(&unclear); /* all unpicked groups -> alive */
for (int i = 0; i < unclear_orig.moves; i++)
if (!(k & (1 << i)))
mq_add(&dead_extra, unclear_orig.move[i], 0); /* picked groups -> dead */
if (pass_is_safe(u, b, color, pass_all_alive, msg, log,
dead, &dead_extra, &unclear, true, ""))
return true;
init_pass_is_safe_groups(); /* revert changes */
}
return false;
}
/* Strict mode: don't pass until everything is clarified. */
return pass_is_safe(u, b, color, pass_all_alive, msg, log,
dead, &dead_extra, &unclear, false, "");
}
static bool
pass_is_safe(uct_t *u, board_t *b, enum stone color, bool pass_all_alive, char **msg, bool log,
move_queue_t *dead, move_queue_t *dead_extra, move_queue_t *unclear,
bool unclear_kludge, char *label)
{
move_queue_t guessed; guessed = *dead_extra;
mq_append(dead, dead_extra);
bool r = pass_is_safe_(u, b, color, pass_all_alive, msg, dead, unclear, unclear_kludge);
/* smart pass: log guessed unclear groups if successful (DEBUGL(2)) */
if (unclear_kludge && log && r && DEBUGL(2) && !DEBUGL(3)) {
mq_print("pass ok assuming dead: ", &guessed);
fprintf(stderr, "%s\n", label);
}
/* smart pass: log failed attempts as well (DEBUGL(3)) */
if (unclear_kludge && log && DEBUGL(3)) { /* log everything */
fprintf(stderr, " pass %s ", (r ? "ok" : "no"));
int n = 0;
n += mq_print("assuming dead: ", &guessed);
fprintf(stderr, "%*s -> %-7s %s %s\n", abs(50-n), "",
board_official_score_str(b, dead), (r ? "" : *msg), label);
}
return r;
}
static bool
pass_is_safe_(uct_t *u, board_t *b, enum stone color, bool pass_all_alive, char **msg,
move_queue_t *dead, move_queue_t *unclear, bool unclear_kludge)
{
bool check_score = !u->allow_losing_pass;
if (pass_all_alive) { /* kgs chinese rules cleanup phase */
*msg = "need to remove opponent dead groups first";
for (int i = 0; i < dead->moves; i++)
if (board_at(b, dead->move[i]) == stone_other(color))
return false;
dead->moves = 0; // our dead stones are alive when pass_all_alive is true
float final_score = board_official_score_color(b, dead, color);
*msg = "losing on official score";
return (check_score ? final_score >= 0 : true);
}
int final_ownermap[board_max_coords(b)];
int dame, seki;
floating_t final_score = board_official_score_details(b, dead, &dame, &seki, final_ownermap, &u->ownermap);
if (color == S_BLACK) final_score = -final_score;
floating_t score_est;
if (unclear_kludge)
score_est = final_score; /* unclear groups, can't trust score est ... */
else {
/* Check score estimate first, official score is off if position is not final */
*msg = "losing on score estimate";
score_est = ownermap_score_est_color(b, &u->ownermap, color);
if (check_score && score_est < 0) return false;
}
/* Don't go to counting if position is not final. */
if (!board_position_final_full(b, &u->ownermap, dead, unclear, score_est,
final_ownermap, dame, final_score, msg))
return false;
*msg = "losing on official score";
return (check_score ? final_score >= 0 : true);
}
static void
uct_board_print(engine_t *e, board_t *b, FILE *f)
{
uct_t *u = (uct_t*)e->data;
board_print_ownermap(b, f, (u ? &u->ownermap : NULL));
}
/* Fill ownermap for mcowner pattern feature (no tree search)
* ownermap must be initialized already. */
void
uct_mcowner_playouts(uct_t *u, board_t *b, enum stone color)
{
playout_setup_t ps = playout_setup(u->gamelen, u->mercymin);
/* TODO pick random last move, better playouts randomness */
while (u->ownermap.playouts < GJ_MINGAMES) {
board_t b2;
board_copy(&b2, b);
playout_play_game(&ps, &b2, color, NULL, &u->ownermap, u->playout);
board_done(&b2);
}
}
static ownermap_t*
uct_ownermap(engine_t *e, board_t *b)
{
uct_t *u = (uct_t*)e->data;
/* Make sure ownermap is well-seeded. */
uct_mcowner_playouts(u, b, board_to_play(b));
return &u->ownermap;
}
static char *
uct_notify_play(engine_t *e, board_t *b, move_t *m, char *enginearg, bool *print_board)
{
uct_t *u = (uct_t*)e->data;
bool was_searching = thread_manager_running;
if (!u->t) {
/* No state, create one - this is probably game beginning
* and we need to load the opening tbook right now. */
uct_prepare_move(u, b, m->color);
assert(u->t);
}
/* Stop pondering, required by tree_promote_move() */
uct_pondering_stop(u);
if (u->slave && was_searching && m->color == u->my_color) {
if (DEBUGL(1) && debug_boardprint) *print_board = true;
if (UDEBUGL(3)) tree_dump(u->t, u->dumpthres);
}
if (is_resign(m->coord)) { /* Reset state. */
reset_state(u);
return NULL;
}
/* Save best replies before resetting tree (dcnn pondering). */
if (u->slave && u->pondering_opt)
uct_genmove_pondering_save_replies(u, b, m->color, m->coord);
/* Promote node of the appropriate move to the tree root.
* If using dcnn, only promote node if it has dcnn priors:
* Direction of tree search is heavily influenced by initial priors,
* if we started searching without dcnn data better start from scratch. */
enum promote_reason reason;
assert(u->t->root);
if (!tree_promote_move(u->t, m, b, &reason)) {
if (UDEBUGL(3)) {
if (reason == PROMOTE_UNTRUSTWORTHY) fprintf(stderr, "Not promoting move node in untrustworthy tree.\n");
else if (reason == PROMOTE_DCNN_MISSING) fprintf(stderr, "Played move has no dcnn priors, resetting tree.\n");
else fprintf(stderr, "Warning: Cannot promote move node! Several play commands in row?\n");
}
/* Preserve dynamic komi information, though, that is important. */
u->initial_extra_komi = u->t->extra_komi;
reset_state(u);
}
/* If we are a slave in a distributed engine, start pondering once
* we know which move we actually played. See uct_genmove() about
* the check for pass. */
if (u->slave && u->pondering_opt && m->color == u->my_color && !is_pass(m->coord))
uct_genmove_pondering_start(u, b, m->color, m->coord);
return NULL;
}
static char *
uct_result(engine_t *e, board_t *b)
{
uct_t *u = (uct_t*)e->data;
static char reply[1024];
if (!u->t)
return NULL;
enum stone color = u->t->root_color;
tree_node_t *n = u->t->root;
snprintf(reply, 1024, "%s %s %d %.2f %.1f",
stone2str(color), coord2sstr(node_coord(n)),
n->u.playouts, tree_node_get_value(u->t, -1, n->u.value),
u->t->use_extra_komi ? u->t->extra_komi : 0);
return reply;
}
static char *
uct_chat(engine_t *e, board_t *b, bool opponent, char *from, char *cmd)
{
uct_t *u = (uct_t*)e->data;
if (!u->t)
return generic_chat(b, opponent, from, cmd, S_NONE, pass, 0, 1, u->threads, 0.0, 0.0, "");
tree_node_t *n = u->t->root;
double winrate = tree_node_get_value(u->t, -1, n->u.value);
double extra_komi = u->t->use_extra_komi && fabs(u->t->extra_komi) >= 0.5 ? u->t->extra_komi : 0;
char *score_est = ownermap_score_est_str(b, &u->ownermap);
return generic_chat(b, opponent, from, cmd, u->t->root_color, node_coord(n), n->u.playouts, 1,
u->threads, winrate, extra_komi, score_est);
}
static void
uct_dead_groups(engine_t *e, board_t *b, move_queue_t *dead)
{
uct_t *u = (uct_t*)e->data;
/* This means the game is probably over, no use pondering on. */
uct_pondering_stop(u);
if (u->pass_all_alive)
return; // no dead groups
/* Normally last genmove was a pass and we've already figured out dead groups.
* Don't recompute dead groups here, result could be different this time and
* lead to bad result ! */
if (u->pass_moveno == b->moves || u->pass_moveno == b->moves - 1) {
memcpy(dead, &u->dead_groups, sizeof(*dead));
return;
}
if (UDEBUGL(1)) fprintf(stderr, "WARNING: Recomputing dead groups\n");
/* Make sure the ownermap is well-seeded. */
uct_mcowner_playouts(u, b, S_BLACK);
if (UDEBUGL(2)) board_print_ownermap(b, stderr, &u->ownermap);
ownermap_dead_groups(b, &u->ownermap, dead, NULL);
}
static void
uct_stop(engine_t *e)
{
/* This is called on game over notification. However, an undo
* and game resume can follow, so don't panic yet and just
* relax and stop thinking so that we don't waste CPU. */
uct_t *u = (uct_t*)e->data;
uct_pondering_stop(u);
}
/* This is called on engine reset, especially when clear_board
* is received and new game should begin. */
static void
uct_done(engine_t *e)
{
uct_t *u = (uct_t*)e->data;
uct_pondering_stop(u);
if (u->t) reset_state(u);
if (u->dynkomi) u->dynkomi->done(u->dynkomi);
if (u->policy) u->policy->done(u->policy);
if (u->random_policy) u->random_policy->done(u->random_policy);
playout_policy_done(u->playout);
uct_prior_done(u->prior);
#ifdef PACHI_PLUGINS
pluginset_done(u->plugins);
#endif
}
/* Run time-limited MCTS search on foreground. */
static int
uct_search(uct_t *u, board_t *b, time_info_t *ti, enum stone color, tree_t *t, bool print_progress)
{
uct_search_state_t s;
uct_search_start(u, b, color, t, ti, &s, 0);
if (UDEBUGL(2) && s.base_playouts > 0)
fprintf(stderr, "<pre-simulated %d games>\n", s.base_playouts);
/* The search tree is ctx->t. This is currently == . It is important
* to reference ctx->t directly since the
* thread manager will swap the tree pointer asynchronously. */
/* Now, just periodically poll the search tree.
* Note that in case of TD_GAMES, threads will not wait for the
* uct_search_check_stop() signalization.
* TREE_BUSYWAIT_INTERVAL should never be less than desired time, or the
* time control is broken. But if it happens to be less, we still search
* at least 100ms otherwise the move is completely random. */
double interval = TREE_BUSYWAIT_INTERVAL;
bool interval_set = false;
while (1) {
uct_search_interval(u, &interval, &interval_set);
time_sleep(interval);
int i = uct_search_games(&s);
/* Print notifications etc. */
uct_search_progress(u, b, color, t, ti, &s, i);
if (s.fullmem && u->auto_alloc) {
/* Stop search, realloc tree and restart search */
if (uct_search_realloc_tree(u, b, color, ti, &s)) continue;
break;
}
/* Check if we should stop the search. */
if (uct_search_check_stop(u, b, color, t, ti, &s, i))
break;
}
uct_thread_ctx_t *ctx = uct_search_stop();
if (UDEBUGL(3)) {
tree_dump(t, u->dumpthres);
fprintf(stderr, "expanded nodes: %i\n", u->expanded_nodes);
}
if (UDEBUGL(2))
fprintf(stderr, "(avg score %f/%d; dynkomi's %f/%d value %f/%d)\n",
t->avg_score.value, t->avg_score.playouts,
u->dynkomi->score.value, u->dynkomi->score.playouts,
u->dynkomi->value.value, u->dynkomi->value.playouts);
if (print_progress)
uct_progress_status(u, t, b, color, 0, NULL);
if (u->debug_after.playouts > 0) {
/* Now, start an additional run of playouts, single threaded. */
time_info_t debug_ti;
debug_ti.type = TT_MOVE;
debug_ti.dim = TD_GAMES;
debug_ti.games = t->root->u.playouts + u->debug_after.playouts;
debug_ti.games_max = 0;
board_print_ownermap(b, stderr, &u->ownermap);
fprintf(stderr, "--8<-- UCT debug post-run begin (%d:%d) --8<--\n", u->debug_after.level, u->debug_after.playouts);
int debug_level_save = debug_level;
int u_debug_level_save = u->debug_level;
int p_debug_level_save = u->playout->debug_level;
debug_level = u->debug_after.level;
u->debug_level = u->debug_after.level;
u->playout->debug_level = u->debug_after.level;
uct_halt = false;
uct_playouts(u, b, color, t, &debug_ti);
tree_dump(t, u->dumpthres);
uct_halt = true;
debug_level = debug_level_save;
u->debug_level = u_debug_level_save;
u->playout->debug_level = p_debug_level_save;
fprintf(stderr, "--8<-- UCT debug post-run finished --8<--\n");
}
#ifdef DISTRIBUTED
u->played_own += ctx->games;
#endif
return ctx->games;
}
/* Dcnn pondering:
* Next move wasn't searched with dcnn priors, search will start from scratch
* so it gets dcnn evaluated (and next move as well). Save opponent best replies
* from search before tree gets reset, will need it later to guess next move.
* Call order must be:
* uct_genmove_pondering_save_replies()
* tree_promote_node()
* uct_genmove_pondering_start() */
static void
uct_genmove_pondering_save_replies(uct_t *u, board_t *b, enum stone color, coord_t next_move)
{
if (!(u->pondering_opt && using_dcnn(b))) return;
int nbest = u->dcnn_pondering_mcts;
coord_t *best_c = u->dcnn_pondering_mcts_c;
float best_r[nbest];
for (int i = 0; i < nbest; i++)
best_c[i] = pass;
if (!(u->t && color == stone_other(u->t->root_color))) return;
tree_node_t *best = tree_get_node(u->t->root, next_move);
if (!best) return;
uct_get_best_moves_at(u, best, best_c, best_r, nbest, false, 100);
}
/* Start pondering at the end of genmove.
* Must call uct_genmove_pondering_save_replies() before. */
static void
uct_genmove_pondering_start(uct_t *u, board_t *b, enum stone color, coord_t our_move)
{
enum stone other_color = stone_other(color);
if (!u->t) uct_prepare_move(u, b, other_color);
uct_pondering_start(u, b, u->t, other_color, our_move, UCT_SEARCH_GENMOVE_PONDERING | UCT_SEARCH_WANT_GC);
}
/* Start pondering in the background with @color to play.
* @our_move move to be added before starting. 0 means doesn't apply.
* @flags uct_search_start() flags for this search */
static void
uct_pondering_start(uct_t *u, board_t *b0, tree_t *t, enum stone color, coord_t our_move, int flags)
{
if (UDEBUGL(1))
fprintf(stderr, "Starting to ponder with color %s\n", stone2str(stone_other(color)));
flags |= UCT_SEARCH_PONDERING;
u->search_flags = flags;
/* We need a local board copy to ponder upon. */
board_t *b = malloc2(board_t); board_copy(b, b0);
/* Board needs updating ? (b0 did not have the genmove'd move played yet) */
if (our_move) { /* 0 never a real coord */
move_t m = move(our_move, stone_other(color));
int res = board_play(b, &m);
assert(res >= 0);
}
/* analyzing should be only case of switching color to play */
if (genmove_pondering(u)) assert(color == board_to_play(b));
setup_dynkomi(u, b, color);
/* Start MCTS manager thread "headless". */
static uct_search_state_t s;
uct_search_start(u, b, color, t, NULL, &s, flags);
}
/* uct_search_stop() frontend for the pondering (non-genmove) mode, and
* to stop the background search for a slave in the distributed engine. */
void
uct_pondering_stop(uct_t *u)
{
if (!thread_manager_running)
return;
/* Search active but not pondering actually ? Stop search.
* Distributed mode slaves need that, special case. */
if (!pondering(u)) { uct_search_stop(); return; }
/* Stop the thread manager. */
uct_thread_ctx_t *ctx = uct_search_stop(); /* clears search flags */
if (UDEBUGL(1)) uct_progress_status(u, ctx->t, ctx->b, ctx->color, 0, NULL);
free(ctx->b);
u->reporting = u->reporting_opt;
u->report_fh = stderr;
}
void
uct_genmove_setup(uct_t *u, board_t *b, enum stone color)
{
if (b->superko_violation) {
fprintf(stderr, "!!! WARNING: SUPERKO VIOLATION OCCURED BEFORE THIS MOVE\n");
fprintf(stderr, "Maybe you play with situational instead of positional superko?\n");
fprintf(stderr, "I'm going to ignore the violation, but note that I may miss\n");
fprintf(stderr, "some moves valid under this ruleset because of this.\n");
b->superko_violation = false;
}
uct_prepare_move(u, b, color);
assert(u->t);
u->my_color = color;
/* How to decide whether to use dynkomi in this game? Since we use
* pondering, it's not simple "who-to-play" matter. Decide based on
* the last genmove issued. */
u->t->use_extra_komi = !!(u->dynkomi_mask & color);
setup_dynkomi(u, b, color);
}
static tree_node_t *
genmove(engine_t *e, board_t *b, time_info_t *ti, enum stone color, bool pass_all_alive, coord_t *best_coord)
{
uct_t *u = (uct_t*)e->data;
double time_start = time_now();
u->pass_all_alive |= pass_all_alive;
u->mcts_time = 0;
u->expanded_nodes = 0;
uct_pondering_stop(u);
if (u->t) {
bool unexpected_color = (color != board_to_play(b)); /* playing twice in a row ?? */
bool missing_dcnn_priors = (using_dcnn(b) && !(u->t->root->hints & TREE_HINT_DCNN));
if (u->genmove_reset_tree || u->t->untrustworthy_tree ||
unexpected_color || missing_dcnn_priors) {
u->initial_extra_komi = u->t->extra_komi;
reset_state(u);
}
}
uct_genmove_setup(u, b, color);
/* Start the Monte Carlo Tree Search! */
int base_playouts = u->t->root->u.playouts;
int played_games = uct_search(u, b, ti, color, u->t, false);
tree_node_t *best;
best = uct_search_result(u, b, color, u->pass_all_alive, played_games, base_playouts, best_coord);
if (UDEBUGL(2)) {
double total_time = time_now() - time_start;
double mcts_time = u->mcts_time + 0.000001; /* avoid divide by zero */
fprintf(stderr, "genmove in %0.2fs, mcts %0.2fs (%d games/s, %d games/s/thread)\n",
total_time, mcts_time, (int)(played_games/mcts_time), (int)(played_games/mcts_time/u->threads));
}
uct_progress_status(u, u->t, b, color, 0, best_coord);
#ifdef JOSEKIFIX
/* Check joseki override */
if (best) {
coord_t c = joseki_override_no_external_engine(b, &u->prev_ownermap, uct_ownermap(e, b));
if (!is_pass(c)) {
*best_coord = c;
best = tree_get_node(u->t->root, c);
assert(best);
}
}
/* Save ownermap */
u->prev_ownermap = u->ownermap;
#endif
return best;
}
static coord_t
uct_genmove(engine_t *e, board_t *b, time_info_t *ti, enum stone color, bool pass_all_alive)
{
uct_t *u = (uct_t*)e->data;
coord_t best;
tree_node_t *best_node = genmove(e, b, ti, color, pass_all_alive, &best);
/* Pass or resign.
* After a pass, pondering is harmful for two reasons:
* (i) We might keep pondering even when the game is over.
* Of course this is the case for opponent resign as well.
* (ii) More importantly, the ownermap will get skewed since
* the UCT will start cutting off any playouts. */
if (is_pass(best) || is_resign(best)) {
if (is_pass(best))
u->initial_extra_komi = u->t->extra_komi;
reset_state(u);
return best;
}
/* Save best replies before resetting tree (dcnn pondering). */
if (u->pondering_opt)
uct_genmove_pondering_save_replies(u, b, color, best);
/* Promote node or throw away tree as needed.
* Reset now if we don't reuse tree, avoids unnecessary tree gc. */
if (!reusing_tree(u, b) ||
!tree_promote_node(u->t, best_node, b, NULL)) {
/* Preserve dynamic komi information though, that is important. */
u->initial_extra_komi = u->t->extra_komi;
reset_state(u);
}
if (u->pondering_opt)
uct_genmove_pondering_start(u, b, color, best);
return best;
}
/* lz-genmove_analyze */
static coord_t
uct_genmove_analyze(engine_t *e, board_t *b, time_info_t *ti, enum stone color, bool pass_all_alive)
{
uct_t *u = (uct_t*)e->data;
uct_pondering_stop(u); /* Don't clobber report_fh later on... */
u->reporting = UR_LEELA_ZERO;
u->report_fh = stdout;
coord_t coord = uct_genmove(e, b, ti, color, pass_all_alive);
u->reporting = u->reporting_opt;
u->report_fh = stderr;
return coord;
}
/* Start tree search in the background and output stats for the sake of
* frontend running Pachi: sortof like pondering without a genmove.
* Stop processing if @start is 0. */
static void
uct_analyze(engine_t *e, board_t *b, enum stone color, int start)
{
uct_t *u = (uct_t*)e->data;
int flags = (pondering(u) ? u->search_flags : 0);
bool genmove_pondering = genmove_pondering(u);
if (!start) {
if (pondering(u)) uct_pondering_stop(u); /* clears flags ! */
if (genmove_pondering) /* pondering + analyzing ? resume normal pondering */
uct_pondering_start(u, b, u->t, board_to_play(b), 0, flags);
return;
}
/* If pondering already restart, situation/parameters may have changed.
* For example frequency change or getting analyze cmd while pondering. */
if (pondering(u))
uct_pondering_stop(u);
if (u->t) {
bool missing_dcnn_priors = (using_dcnn(b) && !(u->t->root->hints & TREE_HINT_DCNN));
bool switching_color_to_play = (color != stone_other(u->t->root_color));
if (missing_dcnn_priors || switching_color_to_play)
reset_state(u);
}
u->reporting = UR_LEELA_ZERO;
u->report_fh = stdout; /* Reset in uct_pondering_stop() */
if (!u->t) uct_prepare_move(u, b, color);
uct_pondering_start(u, b, u->t, color, 0, flags);
}
/* Same as uct_get_best_moves() for node @parent.
* XXX pass can be a valid move in which case you need best_n to check.
* have another function which exposes best_n ? */
void
uct_get_best_moves_at(uct_t *u, tree_node_t *parent, coord_t *best_c, float *best_r, int nbest,
bool winrates, int min_playouts)
{
tree_node_t* best_n[nbest];
for (int i = 0; i < nbest; i++) {
best_c[i] = pass; best_r[i] = 0; best_n[i] = NULL;
}
/* Find best moves */
for (tree_node_t *n = parent->children; n; n = n->sibling)
if (n->u.playouts >= min_playouts)
best_moves_add_full(node_coord(n), n->u.playouts, n, best_c, best_r, (void**)best_n, nbest);
if (winrates) /* Get winrates */
for (int i = 0; i < nbest && best_n[i]; i++)
best_r[i] = tree_node_get_value(u->t, 1, best_n[i]->u.value);
}
/* Get best moves with at least @min_playouts.
* If @winrates is true @best_r returns winrates instead of visits.
* (moves remain in best-visited order) */
void
uct_get_best_moves(uct_t *u, coord_t *best_c, float *best_r, int nbest, bool winrates, int min_playouts)
{
uct_get_best_moves_at(u, u->t->root, best_c, best_r, nbest, winrates, min_playouts);
}
/* Kindof like uct_genmove() but find the best candidates */
static void
uct_best_moves(engine_t *e, board_t *b, time_info_t *ti, enum stone color,
coord_t *best_c, float *best_r, int nbest)
{
uct_t *u = (uct_t*)e->data;
uct_pondering_stop(u);
if (u->t)
reset_state(u);
coord_t best_coord;
genmove(e, b, ti, color, 0, &best_coord);
uct_get_best_moves(u, best_c, best_r, nbest, true, 100);
if (u->t)
reset_state(u);
}
bool
uct_gentbook(engine_t *e, board_t *b, time_info_t *ti, enum stone color)
{
uct_t *u = (uct_t*)e->data;
if (!u->t) uct_prepare_move(u, b, color);
assert(u->t);
if (ti->dim == TD_GAMES) {
/* Don't count in games that already went into the tbook. */
ti->games += u->t->root->u.playouts;
}
uct_search(u, b, ti, color, u->t, true);
assert(ti->dim == TD_GAMES);
tree_save(u->t, b, ti->games / 100);
return true;
}
void
uct_dumptbook(engine_t *e, board_t *b, enum stone color)
{
uct_t *u = (uct_t*)e->data;
size_t size = u->tree_size;
tree_t *t = tree_init(color, size, 0);
tree_load(t, b);
tree_dump(t, 0);
tree_done(t);
}
floating_t
uct_evaluate_one(engine_t *e, board_t *b, time_info_t *ti, coord_t c, enum stone color)
{
uct_t *u = (uct_t*)e->data;
board_t b2;
board_copy(&b2, b);
move_t m = { c, color };
int res = board_play(&b2, &m);
if (res < 0)
return NAN;
color = stone_other(color);
if (u->t) reset_state(u);
uct_genmove_setup(u, &b2, color);
assert(u->t);
floating_t bestval;
uct_search(u, &b2, ti, color, u->t, true);
tree_node_t *best = u->policy->choose(u->policy, u->t->root, &b2, color, resign);
if (!best) {
bestval = NAN; // the opponent has no reply!
} else {
bestval = tree_node_get_value(u->t, 1, best->u.value);
}
reset_state(u); // clean our junk
return isnan(bestval) ? NAN : 1.0f - bestval;
}
void
uct_evaluate(engine_t *e, board_t *b, time_info_t *ti, floating_t *vals, enum stone color)
{
for (int i = 0; i < b->flen; i++) {
if (is_pass(b->f[i]))
vals[i] = NAN;
else
vals[i] = uct_evaluate_one(e, b, ti, b->f[i], color);
}
}
static void
log_nthreads(uct_t *u)
{
static int logged = 0;
if (DEBUGL(0) && !logged++) fprintf(stderr, "Threads: %i\n", u->threads);
}
size_t
uct_default_tree_size()
{
/* Double it on 64-bit, tree takes up twice as much memory ... */
int mult = (sizeof(void*) == 4 ? 1 : 2);
/* Default tree size can be small now that it grows as needed. */
return (size_t)100 * mult * 1048576;
}
/* Set current tree size to use taking memory limits into account */
void
uct_tree_size_init(uct_t *u, size_t tree_size)
{
size_t max_tree_size = (u->max_tree_size_opt ? u->max_tree_size_opt : (size_t)-1);
size_t max_mem = (u->max_mem ? u->max_mem : (size_t)-1);
/* fixed_mem: can use either "tree_size" or "max_tree_size"
* to set amount of memory to allocate.
* before auto_alloc there was only max_tree_size so you
* can "fixed_mem,max_tree_size=..." to get old behavior. */
if (!u->auto_alloc && u->max_tree_size_opt)
tree_size = u->max_tree_size_opt;
/* Honor memory limits */
if (tree_size > max_tree_size) tree_size = max_tree_size;
if (tree_size > max_mem) tree_size = max_mem;
u->tree_size = tree_size;
}
#define NEED_RESET ENGINE_SETOPTION_NEED_RESET
#define option_error engine_setoption_error
static bool
uct_setoption(engine_t *e, board_t *b, const char *optname, char *optval,
char **err, bool setup, bool *reset)
{
static_strbuf(ebuf, 256);
uct_t *u = (uct_t*)e->data;
/** Basic options */
if (!strcasecmp(optname, "debug")) {
if (optval) u->debug_level = atoi(optval);
else u->debug_level++;
}
else if (!strcasecmp(optname, "reporting") && optval) {
/* The format of output for detailed progress
* information (such as current best move and
* its value, etc.). */
if (!strcasecmp(optval, "text")) {
/* Plaintext traditional output. */
u->reporting = UR_TEXT;
} else if (!strcasecmp(optval, "json")) {