diff --git a/open_spiel/games/german_whist_foregame/german_whist_endgame.cc b/open_spiel/games/german_whist_foregame/german_whist_endgame.cc index 1ec4be3a3c..b6a5660873 100644 --- a/open_spiel/games/german_whist_foregame/german_whist_endgame.cc +++ b/open_spiel/games/german_whist_foregame/german_whist_endgame.cc @@ -1,704 +1,729 @@ -//Source Code for an Executable Generating an Endgame Tablebase for German Whist - +// Source Code for an Executable Generating an Endgame Tablebase for German +// Whist #include #include + #include "open_spiel/games/german_whist_foregame/german_whist_foregame.h" - -//#define DEBUG -namespace open_spiel{ -namespace german_whist_foregame{ + +// #define DEBUG +namespace open_spiel { +namespace german_whist_foregame { struct Pair { - char index; - char value; - Pair(char index_, char value_) { - index = index_; - value = value_; - } - bool operator<(const Pair &pair) const { - return value < pair.value; - } + char index; + char value; + Pair(char index_, char value_) { + index = index_; + value = value_; + } + bool operator<(const Pair& pair) const { return value < pair.value; } }; -struct ActionStruct{ - uint32_t index; - unsigned char suit; - bool player; - ActionStruct(uint32_t index_, unsigned char suit_, bool player_) { - index = index_; - suit = suit_; - player = player_; - } +struct ActionStruct { + uint32_t index; + unsigned char suit; + bool player; + ActionStruct(uint32_t index_, unsigned char suit_, bool player_) { + index = index_; + suit = suit_; + player = player_; + } }; struct ActionValue { - ActionStruct action; - int value; - bool operator<(const ActionValue& aval) const { - return value < aval.value; - } + ActionStruct action; + int value; + bool operator<(const ActionValue& aval) const { return value < aval.value; } }; class Node { -private: - uint32_t cards_; - std::array suit_masks_; - char total_tricks_; - char trump_; - char score_; - char moves_; - bool player_; - std::vector history_; - uint64_t key_; -public: - Node(uint32_t cards, std::array suit_masks, char trump,bool player) { - cards_ = cards; - suit_masks_ = suit_masks; - total_tricks_ = popcnt_u32(cards); - trump_ = trump; - moves_ = 0; - player_ = player; - score_ = 0; - history_ = {}; - }; - bool Player() { return player_; }; - char Score() { return score_; }; - char Moves() { return moves_; }; - bool IsTerminal() { - return (moves_ == 2 * total_tricks_); - } - char RemainingTricks() { - return (char)(total_tricks_-(moves_>>1)); - } - char TotalTricks() { - return total_tricks_; - } - uint32_t Cards() { return cards_; } - std::array SuitMasks() { return suit_masks_; } - uint64_t GetNodeKey() { return key_; } - bool Trick(ActionStruct lead, ActionStruct follow) { - //true if leader won// - return (lead.suit != follow.suit && lead.suit == trump_) || (lead.suit == follow.suit && lead.index <= follow.index); - } - - void RemoveCard(ActionStruct action) { - //Removes card from cards_// - uint32_t mask_b = ~0; - mask_b =bzhi_u32(mask_b, action.index); - uint32_t mask_a = ~mask_b; - mask_a = blsr_u32(mask_a); - uint32_t copy_a = cards_ & mask_a; - uint32_t copy_b = cards_ & mask_b; - copy_a = copy_a >> 1; - cards_ = copy_a | copy_b; - //decrements appropriate suits// - suit_masks_[action.suit] = blsr_u32(suit_masks_[action.suit])>>1; - char suit = action.suit; - suit++; - while (suit < kNumSuits) { - suit_masks_[suit]=suit_masks_[suit] >> 1; - suit++; - } - } - void InsertCard(ActionStruct action) { - //inserts card into cards_// - uint32_t mask_b = ~0; - mask_b = bzhi_u32(mask_b, action.index); - uint32_t mask_a = ~mask_b; - uint32_t copy_b = cards_ & mask_b; - uint32_t copy_a = cards_ & mask_a; - copy_a = copy_a << 1; - uint32_t card = action.player<< action.index; - cards_ = card | copy_a | copy_b; - //increments appropriate suits// - uint32_t new_suit = (suit_masks_[action.suit] & mask_b )| (1 << action.index); - suit_masks_[action.suit] = ((suit_masks_[action.suit] & mask_a) << 1 )| new_suit; - char suit = action.suit; - suit++; - while (suit < kNumSuits) { - suit_masks_[suit] = suit_masks_[suit] << 1; - suit++; - } - } - void UpdateNodeKey() { - //recasts the cards and suitlengths into quasi-canonical form// - //least sig part of 32bit card is trump, then suits in ascending length// - - //note this canonical form does not take advantage of all isomorphisms// - //suppose a game is transformed as follows: all card bits flipped and the player bit flipped, ie player 1 has the lead and has player 0s cards from the original game// - //this implies player 1 achieves the minimax value of the original game ie the value is remaining tricks - value of the original game for this transformed game// - //also does not take advantage of single suit isomorphism. Namely all single suit games with the same card distribution are isomorphic. Currently this considers all trump, all no trump games as distinct// - uint64_t suit_sig = 0; - char trump_length = popcnt_u32(suit_masks_[trump_]); - if (trump_length > kNumRanks) { - throw; - } - std::vector non_trump_lengths; - for (char i = 0; i < kNumSuits; ++i) { - if (i != trump_) { - char length = popcnt_u32(suit_masks_[i]); - uint32_t sig = suit_masks_[i]&cards_; - if (suit_masks_[i] != 0) { - sig = (sig >> (tzcnt_u32(suit_masks_[i]))); - } - if (length > kNumRanks) { - throw 1; - } - non_trump_lengths.push_back(Triple{i,length,sig }); - } - } - //sorting takes advantage of two isomorphisms namely nontrump suits of nonequal length can be exchanged and the value of the game does not change// - //and this more complicated suppose two games with two or more (non_trump)suits of equal length, permuting those suits should not change the value of solved game ie it is an isomorphism// - std::sort(non_trump_lengths.begin(), non_trump_lengths.end()); - suit_sig = suit_sig | trump_length; - for (size_t i = 0; i < non_trump_lengths.size(); ++i) { - suit_sig = suit_sig | ((uint64_t)non_trump_lengths[i].length << (4*(i+1))); - } - suit_sig = suit_sig << 32; - std::array suit_cards; - suit_cards[0] = cards_ & suit_masks_[trump_]; - if (suit_masks_[trump_] != 0) { - suit_cards[0] = suit_cards[0] >> tzcnt_u32(suit_masks_[trump_]); - } - uint32_t sum = popcnt_u32(suit_masks_[trump_]); - uint32_t cards = 0|suit_cards[0]; - for (size_t i = 0; i < non_trump_lengths.size(); ++i) { - suit_cards[i] = cards_ & suit_masks_[non_trump_lengths[i].index]; - uint32_t val = 0; - if (suit_masks_[non_trump_lengths[i].index] != 0) { - val = tzcnt_u32(suit_masks_[non_trump_lengths[i].index]); - } - suit_cards[i]= suit_cards[i] >>val; - suit_cards[i] = suit_cards[i] << sum; - sum += popcnt_u32(suit_masks_[non_trump_lengths[i].index]); - cards = cards | suit_cards[i]; - } - //cards = cards | (player_ << 31); - key_ = suit_sig | (uint64_t)cards; + private: + uint32_t cards_; + std::array suit_masks_; + char total_tricks_; + char trump_; + char score_; + char moves_; + bool player_; + std::vector history_; + uint64_t key_; + + public: + Node(uint32_t cards, std::array suit_masks, char trump, + bool player) { + cards_ = cards; + suit_masks_ = suit_masks; + total_tricks_ = popcnt_u32(cards); + trump_ = trump; + moves_ = 0; + player_ = player; + score_ = 0; + history_ = {}; + }; + bool Player() { return player_; }; + char Score() { return score_; }; + char Moves() { return moves_; }; + bool IsTerminal() { return (moves_ == 2 * total_tricks_); } + char RemainingTricks() { return (char)(total_tricks_ - (moves_ >> 1)); } + char TotalTricks() { return total_tricks_; } + uint32_t Cards() { return cards_; } + std::array SuitMasks() { return suit_masks_; } + uint64_t GetNodeKey() { return key_; } + bool Trick(ActionStruct lead, ActionStruct follow) { + // true if leader won// + return (lead.suit != follow.suit && lead.suit == trump_) || + (lead.suit == follow.suit && lead.index <= follow.index); + } + + void RemoveCard(ActionStruct action) { + // Removes card from cards_// + uint32_t mask_b = ~0; + mask_b = bzhi_u32(mask_b, action.index); + uint32_t mask_a = ~mask_b; + mask_a = blsr_u32(mask_a); + uint32_t copy_a = cards_ & mask_a; + uint32_t copy_b = cards_ & mask_b; + copy_a = copy_a >> 1; + cards_ = copy_a | copy_b; + // decrements appropriate suits// + suit_masks_[action.suit] = blsr_u32(suit_masks_[action.suit]) >> 1; + char suit = action.suit; + suit++; + while (suit < kNumSuits) { + suit_masks_[suit] = suit_masks_[suit] >> 1; + suit++; + } + } + void InsertCard(ActionStruct action) { + // inserts card into cards_// + uint32_t mask_b = ~0; + mask_b = bzhi_u32(mask_b, action.index); + uint32_t mask_a = ~mask_b; + uint32_t copy_b = cards_ & mask_b; + uint32_t copy_a = cards_ & mask_a; + copy_a = copy_a << 1; + uint32_t card = action.player << action.index; + cards_ = card | copy_a | copy_b; + // increments appropriate suits// + uint32_t new_suit = + (suit_masks_[action.suit] & mask_b) | (1 << action.index); + suit_masks_[action.suit] = + ((suit_masks_[action.suit] & mask_a) << 1) | new_suit; + char suit = action.suit; + suit++; + while (suit < kNumSuits) { + suit_masks_[suit] = suit_masks_[suit] << 1; + suit++; + } + } + void UpdateNodeKey() { + // recasts the cards and suitlengths into quasi-canonical form// + // least sig part of 32bit card is trump, then suits in ascending length// + + // note this canonical form does not take advantage of all isomorphisms// + // suppose a game is transformed as follows: all card bits flipped and the + // player bit flipped, ie player 1 has the lead and has player 0s cards from + // the original game// this implies player 1 achieves the minimax value of + // the original game ie the value is remaining tricks - value of the + // original game for this transformed game// also does not take advantage of + // single suit isomorphism. Namely all single suit games with the same card + // distribution are isomorphic. Currently this considers all trump, all no + // trump games as distinct// + uint64_t suit_sig = 0; + char trump_length = popcnt_u32(suit_masks_[trump_]); + if (trump_length > kNumRanks) { + throw; + } + std::vector non_trump_lengths; + for (char i = 0; i < kNumSuits; ++i) { + if (i != trump_) { + char length = popcnt_u32(suit_masks_[i]); + uint32_t sig = suit_masks_[i] & cards_; + if (suit_masks_[i] != 0) { + sig = (sig >> (tzcnt_u32(suit_masks_[i]))); + } + if (length > kNumRanks) { + throw 1; + } + non_trump_lengths.push_back(Triple{i, length, sig}); + } + } + // sorting takes advantage of two isomorphisms namely nontrump suits of + // nonequal length can be exchanged and the value of the game does not + // change// and this more complicated suppose two games with two or more + // (non_trump)suits of equal length, permuting those suits should not change + // the value of solved game ie it is an isomorphism// + std::sort(non_trump_lengths.begin(), non_trump_lengths.end()); + suit_sig = suit_sig | trump_length; + for (size_t i = 0; i < non_trump_lengths.size(); ++i) { + suit_sig = + suit_sig | ((uint64_t)non_trump_lengths[i].length << (4 * (i + 1))); + } + suit_sig = suit_sig << 32; + std::array suit_cards; + suit_cards[0] = cards_ & suit_masks_[trump_]; + if (suit_masks_[trump_] != 0) { + suit_cards[0] = suit_cards[0] >> tzcnt_u32(suit_masks_[trump_]); + } + uint32_t sum = popcnt_u32(suit_masks_[trump_]); + uint32_t cards = 0 | suit_cards[0]; + for (size_t i = 0; i < non_trump_lengths.size(); ++i) { + suit_cards[i] = cards_ & suit_masks_[non_trump_lengths[i].index]; + uint32_t val = 0; + if (suit_masks_[non_trump_lengths[i].index] != 0) { + val = tzcnt_u32(suit_masks_[non_trump_lengths[i].index]); + } + suit_cards[i] = suit_cards[i] >> val; + suit_cards[i] = suit_cards[i] << sum; + sum += popcnt_u32(suit_masks_[non_trump_lengths[i].index]); + cards = cards | suit_cards[i]; + } + // cards = cards | (player_ << 31); + key_ = suit_sig | (uint64_t)cards; #ifdef DEBUG_KEY - std::cout <<"CARDS_ " << cards_ << std::endl; - std::cout << "CARDS " << cards << std::endl; - std::cout << "SUIT MASKS " << std::endl; - for (int i = 0; i < kNumSuits; ++i) { - std::cout << suit_masks_[i] << std::endl; - } - std::cout << "SUIT_SIG " << suit_sig << std::endl; - std::cout<<"KEY " << key_ << std::endl; + std::cout << "CARDS_ " << cards_ << std::endl; + std::cout << "CARDS " << cards << std::endl; + std::cout << "SUIT MASKS " << std::endl; + for (int i = 0; i < kNumSuits; ++i) { + std::cout << suit_masks_[i] << std::endl; + } + std::cout << "SUIT_SIG " << suit_sig << std::endl; + std::cout << "KEY " << key_ << std::endl; #endif - } - uint64_t AltKey() { - uint32_t mask = bzhi_u32(~0, 2 * RemainingTricks()); - return key_ ^ (uint64_t)mask; - } - //Move Ordering Heuristics// - //These could Definitely be improved, very hacky// - int LeadOrdering(ActionStruct action) { - char suit = action.suit; - uint32_t copy_cards = cards_; - if (player_ == 0) { - copy_cards = ~copy_cards; - } - uint32_t suit_cards = copy_cards & suit_masks_[suit]; - uint32_t mask = suit_cards & ~(suit_cards >> 1); - //represents out of the stategically inequivalent cards in a suit that a player holds, what rank is it, rank 0 is highest rank etc// - int suit_rank = popcnt_u32(bzhi_u32(mask, action.index)); - ApplyAction(action); - std::vector moves = LegalActions(); - UndoAction(action); - int sum = 0; - for (size_t i = 0; i < moves.size(); ++i) { - sum += Trick(action, moves[i]); - } - if (sum == moves.size()) { - return action.suit == trump_ ? 0 - suit_rank : -1 * kNumRanks - suit_rank;//intriguing this seems to produce small perfomance increase// - } - if (sum == 0) { - return 2 * kNumRanks - suit_rank; - } - else { - return 1 * kNumRanks - suit_rank; - } - } - int FollowOrdering(ActionStruct action) { - ActionStruct lead = history_.back(); - //follow ordering for fast cut offs// - //win as cheaply as possible, followed by lose as cheaply as possible - char suit = action.suit; - uint32_t copy_cards = cards_; - if (player_ == 0) { - copy_cards = ~copy_cards; - } - uint32_t suit_cards = copy_cards & suit_masks_[suit]; - uint32_t mask = suit_cards & ~(suit_cards >> 1); - //represents out of the stategically inequivalent cards in a suit that a player holds, what rank is it, rank 0 is highest rank etc// - int suit_rank = popcnt_u32(bzhi_u32(mask, action.index)); - if (!Trick(lead, action)) { - return -kNumRanks - suit_rank; - } - else { - return -suit_rank; - } - } - - - - std::vector LegalActions() { - //Features// - //Move fusion// - std::vector out; - out.reserve(kNumRanks); - uint32_t copy_cards = cards_; - std::array player_suit_masks; - if (player_ == 0) { - copy_cards = ~copy_cards; - } - for (size_t i = 0; i < kNumSuits; ++i) { - uint32_t suit_cards = copy_cards & suit_masks_[i]; - player_suit_masks[i] = suit_cards & ~(suit_cards >> 1); + } + uint64_t AltKey() { + uint32_t mask = bzhi_u32(~0, 2 * RemainingTricks()); + return key_ ^ (uint64_t)mask; + } + // Move Ordering Heuristics// + // These could Definitely be improved, very hacky// + int LeadOrdering(ActionStruct action) { + char suit = action.suit; + uint32_t copy_cards = cards_; + if (player_ == 0) { + copy_cards = ~copy_cards; + } + uint32_t suit_cards = copy_cards & suit_masks_[suit]; + uint32_t mask = suit_cards & ~(suit_cards >> 1); + // represents out of the stategically inequivalent cards in a suit that a + // player holds, what rank is it, rank 0 is highest rank etc// + int suit_rank = popcnt_u32(bzhi_u32(mask, action.index)); + ApplyAction(action); + std::vector moves = LegalActions(); + UndoAction(action); + int sum = 0; + for (size_t i = 0; i < moves.size(); ++i) { + sum += Trick(action, moves[i]); + } + if (sum == moves.size()) { + return action.suit == trump_ + ? 0 - suit_rank + : -1 * kNumRanks - + suit_rank; // intriguing this seems to produce small + // perfomance increase// + } + if (sum == 0) { + return 2 * kNumRanks - suit_rank; + } else { + return 1 * kNumRanks - suit_rank; + } + } + int FollowOrdering(ActionStruct action) { + ActionStruct lead = history_.back(); + // follow ordering for fast cut offs// + // win as cheaply as possible, followed by lose as cheaply as possible + char suit = action.suit; + uint32_t copy_cards = cards_; + if (player_ == 0) { + copy_cards = ~copy_cards; + } + uint32_t suit_cards = copy_cards & suit_masks_[suit]; + uint32_t mask = suit_cards & ~(suit_cards >> 1); + // represents out of the stategically inequivalent cards in a suit that a + // player holds, what rank is it, rank 0 is highest rank etc// + int suit_rank = popcnt_u32(bzhi_u32(mask, action.index)); + if (!Trick(lead, action)) { + return -kNumRanks - suit_rank; + } else { + return -suit_rank; + } + } + + std::vector LegalActions() { + // Features// + // Move fusion// + std::vector out; + out.reserve(kNumRanks); + uint32_t copy_cards = cards_; + std::array player_suit_masks; + if (player_ == 0) { + copy_cards = ~copy_cards; + } + for (size_t i = 0; i < kNumSuits; ++i) { + uint32_t suit_cards = copy_cards & suit_masks_[i]; + player_suit_masks[i] = suit_cards & ~(suit_cards >> 1); #ifdef DEBUG - std::cout << "Cards " << cards_ << std::endl; - std::cout << "Suit Mask " << i << " " << suit_masks_[i] << std::endl; - std::cout << "Player " << player_ << " suit mask " << (int)i << " " << player_suit_masks[i] << std::endl; + std::cout << "Cards " << cards_ << std::endl; + std::cout << "Suit Mask " << i << " " << suit_masks_[i] << std::endl; + std::cout << "Player " << player_ << " suit mask " << (int)i << " " + << player_suit_masks[i] << std::endl; #endif - } - for (char i = 0; i < kNumSuits; ++i) { - uint32_t suit_mask = player_suit_masks[i]; - bool lead = (moves_ % 2 == 0); - bool follow = (moves_ % 2 == 1); - bool correct_suit = 0; - bool void_in_suit = 0; - if (follow == true) { - correct_suit = (history_.back().suit == i); - void_in_suit = (player_suit_masks[history_.back().suit] == 0); - } - if ((lead || (follow && (correct_suit || void_in_suit)))) { - while (suit_mask != 0) { - uint32_t best = tzcnt_u32(suit_mask); - out.push_back(ActionStruct(best,i,player_)); - suit_mask = blsr_u32(suit_mask); - } - } - } + } + for (char i = 0; i < kNumSuits; ++i) { + uint32_t suit_mask = player_suit_masks[i]; + bool lead = (moves_ % 2 == 0); + bool follow = (moves_ % 2 == 1); + bool correct_suit = 0; + bool void_in_suit = 0; + if (follow == true) { + correct_suit = (history_.back().suit == i); + void_in_suit = (player_suit_masks[history_.back().suit] == 0); + } + if ((lead || (follow && (correct_suit || void_in_suit)))) { + while (suit_mask != 0) { + uint32_t best = tzcnt_u32(suit_mask); + out.push_back(ActionStruct(best, i, player_)); + suit_mask = blsr_u32(suit_mask); + } + } + } #ifdef DEBUG - std::cout << "Player " << player_ << " MoveGen " << std::endl; - for (size_t i = 0; i < out.size(); ++i) { - std::cout << out[i].index << " " << (int)out[i].suit << std::endl; - } -#endif - return out; + std::cout << "Player " << player_ << " MoveGen " << std::endl; + for (size_t i = 0; i < out.size(); ++i) { + std::cout << out[i].index << " " << (int)out[i].suit << std::endl; } - void ApplyAction(ActionStruct action) { +#endif + return out; + } + void ApplyAction(ActionStruct action) { #ifdef DEBUG - std::cout << "Player " << player_ << " ApplyAction " << action.index << " " << (int)action.suit << std::endl; + std::cout << "Player " << player_ << " ApplyAction " << action.index << " " + << (int)action.suit << std::endl; #endif - if (moves_ % 2 == 1) { - ActionStruct lead = history_.back(); - bool winner = !((Trick(lead, action)) ^ lead.player); + if (moves_ % 2 == 1) { + ActionStruct lead = history_.back(); + bool winner = !((Trick(lead, action)) ^ lead.player); #ifdef DEBUG - std::cout << "Player " << winner << " won this trick" << std::endl; + std::cout << "Player " << winner << " won this trick" << std::endl; #endif - score_ += (winner == 0); - player_ = (winner); - } - else { - player_ = !player_; - } + score_ += (winner == 0); + player_ = (winner); + } else { + player_ = !player_; + } #ifdef DEBUG - assert((suit_masks_[0] & suit_masks_[1]) == 0); - assert((suit_masks_[0] & suit_masks_[2])== 0); - assert((suit_masks_[0] & suit_masks_[3]) == 0); - assert((suit_masks_[1] & suit_masks_[2]) == 0); - assert((suit_masks_[1] & suit_masks_[3]) == 0); - assert((suit_masks_[2] & suit_masks_[3]) == 0); + assert((suit_masks_[0] & suit_masks_[1]) == 0); + assert((suit_masks_[0] & suit_masks_[2]) == 0); + assert((suit_masks_[0] & suit_masks_[3]) == 0); + assert((suit_masks_[1] & suit_masks_[2]) == 0); + assert((suit_masks_[1] & suit_masks_[3]) == 0); + assert((suit_masks_[2] & suit_masks_[3]) == 0); #endif - RemoveCard(action); - moves_++; - history_.push_back(action); - } - void UndoAction(ActionStruct action) { - if (moves_ % 2 == 0) { - ActionStruct lead = history_[history_.size() - 2]; - ActionStruct follow = history_[history_.size() - 1]; - bool winner = !(Trick(lead, follow) ^ lead.player); - score_ -= (winner == 0); - } - InsertCard(action); - moves_--; - player_=history_.back().player; - history_.pop_back(); + RemoveCard(action); + moves_++; + history_.push_back(action); + } + void UndoAction(ActionStruct action) { + if (moves_ % 2 == 0) { + ActionStruct lead = history_[history_.size() - 2]; + ActionStruct follow = history_[history_.size() - 1]; + bool winner = !(Trick(lead, follow) ^ lead.player); + score_ -= (winner == 0); + } + InsertCard(action); + moves_--; + player_ = history_.back().player; + history_.pop_back(); #ifdef DEBUG - std::cout << "Player " << player_ << " UndoAction " << action.index << " " << (int)action.suit << std::endl; + std::cout << "Player " << player_ << " UndoAction " << action.index << " " + << (int)action.suit << std::endl; #endif - } + } }; - - -//solvers below +// solvers below int AlphaBeta(Node* node, int alpha, int beta) { - //fail soft ab search// - //uses move ordering to speed up search// - if (node->IsTerminal()) { - return node->Score(); - } - //move ordering code// - std::vector actions = node->LegalActions(); - std::vector temp; - temp.reserve(kNumRanks); - for(int i =0;iMoves()%2==0){ - temp.push_back({actions[i],node->LeadOrdering(actions[i])}); - } - else{ - temp.push_back({actions[i],node->FollowOrdering(actions[i])}); - } + // fail soft ab search// + // uses move ordering to speed up search// + if (node->IsTerminal()) { + return node->Score(); + } + // move ordering code// + std::vector actions = node->LegalActions(); + std::vector temp; + temp.reserve(kNumRanks); + for (int i = 0; i < actions.size(); ++i) { + if (node->Moves() % 2 == 0) { + temp.push_back({actions[i], node->LeadOrdering(actions[i])}); + } else { + temp.push_back({actions[i], node->FollowOrdering(actions[i])}); + } + } + std::sort(temp.begin(), temp.end()); + for (int i = 0; i < temp.size(); ++i) { + actions[i] = temp[i].action; + } + // alpha beta search// + if (node->Player() == 0) { + int val = 0; + for (int i = 0; i < actions.size(); ++i) { + node->ApplyAction(actions[i]); + val = std::max(val, AlphaBeta(node, alpha, beta)); + node->UndoAction(actions[i]); + alpha = std::max(val, alpha); + if (val >= beta) { + break; + } } - std::sort(temp.begin(),temp.end()); - for(int i=0;iPlayer() == 0) { - int val = 0; - for (int i = 0; i < actions.size(); ++i) { - node->ApplyAction(actions[i]); - val = std::max(val, AlphaBeta(node, alpha, beta)); - node->UndoAction(actions[i]); - alpha = std::max(val, alpha); - if (val >= beta) { - break; - } - } - return val; - } - else if (node->Player() == 1) { - int val =node->TotalTricks(); - for (int i = 0; i < actions.size(); ++i) { - node->ApplyAction(actions[i]); - val = std::min(val, AlphaBeta(node, alpha, beta)); - node->UndoAction(actions[i]); - beta = std::min(val, beta); - if (val <= alpha) { - break; - } - } - return val; + return val; + } else if (node->Player() == 1) { + int val = node->TotalTricks(); + for (int i = 0; i < actions.size(); ++i) { + node->ApplyAction(actions[i]); + val = std::min(val, AlphaBeta(node, alpha, beta)); + node->UndoAction(actions[i]); + beta = std::min(val, beta); + if (val <= alpha) { + break; + } } - return -1; + return val; + } + return -1; }; +// Helper Functions// - -//Helper Functions// - - -//Credit to computationalcombinatorics.wordpress.com -//hideous code for generating the next colexicographical combination// +// Credit to computationalcombinatorics.wordpress.com +// hideous code for generating the next colexicographical combination// bool NextColex(std::vector& v, int k) { - int num = 0; - for (int i = 0; i < v.size(); ++i) { - if (i == v.size() - 1) { - v[i] = v[i] + 1; - if (v[i] > k - v.size() + i) { - return false; - } - num = i; - break; - } - else if (v[i + 1] - v[i] > 1 && v[i + 1] != i) { - v[i] = v[i] + 1; - if (v[i] > k - v.size() + i) { - return false; - } - num = i; - break; - } - } - for (int i = 0; i < num; ++i) { - v[i] = i; - } - return true; + int num = 0; + for (int i = 0; i < v.size(); ++i) { + if (i == v.size() - 1) { + v[i] = v[i] + 1; + if (v[i] > k - v.size() + i) { + return false; + } + num = i; + break; + } else if (v[i + 1] - v[i] > 1 && v[i + 1] != i) { + v[i] = v[i] + 1; + if (v[i] > k - v.size() + i) { + return false; + } + num = i; + break; + } + } + for (int i = 0; i < num; ++i) { + v[i] = i; + } + return true; } - - -char IncrementalAlphaBetaMemoryIso(Node* node, char alpha, char beta,int depth, vectorNa* TTable,std::unordered_map* SuitRanks, std::vector>& bin_coeffs) { - //fail soft ab search - char val = 0; - uint64_t key = 0; - bool player = node->Player(); - if (node->IsTerminal()) { - return node->Score(); - } - if (node->Moves() % 2 == 0&& depth==0) { - node->UpdateNodeKey(); - key = (player) ? node->AltKey() : node->GetNodeKey(); - uint32_t cards = key & bzhi_u64(~0, 32); - uint32_t colex = HalfColexer(cards, &bin_coeffs); - uint32_t suits = (key & (~0 ^ bzhi_u64(~0, 32))) >> 32; - uint32_t suit_rank = SuitRanks->at(suits); - char value = (player) ? node->RemainingTricks() - TTable->Get(colex,suit_rank) :TTable->Get(colex,suit_rank); - return value+node->Score(); - } - else if (node->Player() == 0) { - val = 0; - std::vector actions = node->LegalActions(); - for (int i = 0; i < actions.size(); ++i) { - node->ApplyAction(actions[i]); - val = std::max(val,IncrementalAlphaBetaMemoryIso(node, alpha, beta,depth-1, TTable,SuitRanks,bin_coeffs)); - node->UndoAction(actions[i]); - alpha = std::max(val, alpha); - if (val >= beta) { - break; - } - } - } - else if (node->Player() == 1) { - val =node->TotalTricks(); - std::vector actions = node->LegalActions(); - for (int i = 0; i < actions.size(); ++i) { - node->ApplyAction(actions[i]); - val = std::min(val, IncrementalAlphaBetaMemoryIso(node, alpha, beta,depth-1, TTable,SuitRanks,bin_coeffs)); - node->UndoAction(actions[i]); - beta = std::min(val, beta); - if (val <= alpha) { - break; - } - } - } - return val; +char IncrementalAlphaBetaMemoryIso( + Node* node, char alpha, char beta, int depth, vectorNa* TTable, + std::unordered_map* SuitRanks, + const std::vector>& bin_coeffs) { + // fail soft ab search + char val = 0; + uint64_t key = 0; + bool player = node->Player(); + if (node->IsTerminal()) { + return node->Score(); + } + if (node->Moves() % 2 == 0 && depth == 0) { + node->UpdateNodeKey(); + key = (player) ? node->AltKey() : node->GetNodeKey(); + uint32_t cards = key & bzhi_u64(~0, 32); + uint32_t colex = HalfColexer(cards, &bin_coeffs); + uint32_t suits = (key & (~0 ^ bzhi_u64(~0, 32))) >> 32; + uint32_t suit_rank = SuitRanks->at(suits); + char value = (player) + ? node->RemainingTricks() - TTable->Get(colex, suit_rank) + : TTable->Get(colex, suit_rank); + return value + node->Score(); + } else if (node->Player() == 0) { + val = 0; + std::vector actions = node->LegalActions(); + for (int i = 0; i < actions.size(); ++i) { + node->ApplyAction(actions[i]); + val = std::max( + val, IncrementalAlphaBetaMemoryIso(node, alpha, beta, depth - 1, + TTable, SuitRanks, bin_coeffs)); + node->UndoAction(actions[i]); + alpha = std::max(val, alpha); + if (val >= beta) { + break; + } + } + } else if (node->Player() == 1) { + val = node->TotalTricks(); + std::vector actions = node->LegalActions(); + for (int i = 0; i < actions.size(); ++i) { + node->ApplyAction(actions[i]); + val = std::min( + val, IncrementalAlphaBetaMemoryIso(node, alpha, beta, depth - 1, + TTable, SuitRanks, bin_coeffs)); + node->UndoAction(actions[i]); + beta = std::min(val, beta); + if (val <= alpha) { + break; + } + } + } + return val; }; -std::vector GWhistGenerator(int num,unsigned int seed){ - //generates pseudorandom endgames// - std::vector out; - out.reserve(num); - std::mt19937 g(seed); - std::array nums; - for (int i = 0; i < 2 * kNumRanks; ++i) { - nums[i] = i; - } - for (int i = 0; i < num; ++i) { - std::shuffle(nums.begin(), nums.end(), g); - uint32_t cards = 0; - std::array suits; - for (int j = 0; j < kNumRanks; ++j) { - cards = cards | (1 << nums[j]); - } - int sum = 0; - std::vector suit_lengths = {0,0,0,0}; - for(int j =0;j distrib(min,max); - suit_lengths[j] = distrib(g); - sum+= suit_lengths[j]; - } - suit_lengths[kNumSuits-1]=2*kNumRanks-sum; - sum =0; - for(int j =0;jkNumRanks){ - throw; - } - } - if(sum!= 2*kNumRanks){ - for(int j =0;j GWhistGenerator(int num, unsigned int seed) { + // generates pseudorandom endgames// + std::vector out; + out.reserve(num); + std::mt19937 g(seed); + std::array nums; + for (int i = 0; i < 2 * kNumRanks; ++i) { + nums[i] = i; + } + for (int i = 0; i < num; ++i) { + std::shuffle(nums.begin(), nums.end(), g); + uint32_t cards = 0; + std::array suits; + for (int j = 0; j < kNumRanks; ++j) { + cards = cards | (1 << nums[j]); + } + int sum = 0; + std::vector suit_lengths = {0, 0, 0, 0}; + for (int j = 0; j < kNumSuits - 1; ++j) { + int max = std::min(kNumRanks, 2 * kNumRanks - sum); + int min = std::max(0, (j - 1) * kNumRanks - sum); + std::uniform_int_distribution<> distrib(min, max); + suit_lengths[j] = distrib(g); + sum += suit_lengths[j]; + } + suit_lengths[kNumSuits - 1] = 2 * kNumRanks - sum; + sum = 0; + for (int j = 0; j < kNumSuits; ++j) { + sum += suit_lengths[j]; + if (suit_lengths[j] > kNumRanks) { + throw; + } + } + if (sum != 2 * kNumRanks) { + for (int j = 0; j < suit_lengths.size(); ++j) { + std::cout << suit_lengths[j] << " " << std::endl; + } + throw; + } + int cum_sum = 0; + for (int j = 0; j < kNumSuits; ++j) { + if (j == 0) { + suits[j] = bzhi_u32(~0, suit_lengths[j]); + } else { + suits[j] = + (bzhi_u32(~0, suit_lengths[j] + cum_sum)) ^ bzhi_u32(~0, cum_sum); + } + cum_sum += suit_lengths[j]; + } + out.push_back(Node(cards, suits, 0, false)); #ifdef DEBUG - std::cout << popcnt_u32(cards) << " " << popcnt_u32(suits[0]) + popcnt_u32(suits[1]) + popcnt_u32(suits[2]) + popcnt_u32(suits[3]) << std::endl; - std::cout << cards << " " << suits[0] << " " << suits[1] << " " << suits[2] << " " << suits[3] << std::endl; + std::cout << popcnt_u32(cards) << " " + << popcnt_u32(suits[0]) + popcnt_u32(suits[1]) + + popcnt_u32(suits[2]) + popcnt_u32(suits[3]) + << std::endl; + std::cout << cards << " " << suits[0] << " " << suits[1] << " " << suits[2] + << " " << suits[3] << std::endl; #endif - - } - return out; + } + return out; } - -void ThreadSolver(int size_endgames, vectorNa* outTTable, vectorNa* TTable, std::vector>& bin_coeffs, std::vector& suit_splits, std::unordered_map& SuitRanks, size_t start_id, size_t end_id) { - //takes endgames solved to depth d-1 and returns endgames solved to depth d // - std::vector combination; - combination.reserve(size_endgames); - for (int i = 0; i < size_endgames; ++i) { - combination.push_back(i); - } - bool control = true; - int count = 0; +void ThreadSolver(int size_endgames, vectorNa* outTTable, vectorNa* TTable, + const std::vector>& bin_coeffs, + const std::vector& suit_splits, + const std::unordered_map& SuitRanks, + size_t start_id, size_t end_id) { + // takes endgames solved to depth d-1 and returns endgames solved to depth d + // // + std::vector combination; + combination.reserve(size_endgames); + for (int i = 0; i < size_endgames; ++i) { + combination.push_back(i); + } + bool control = true; + int count = 0; + uint32_t cards = 0; + for (int i = 0; i < combination.size(); ++i) { + cards = cards | (1 << combination[i]); + } + while (count < start_id) { + NextColex(combination, 2 * size_endgames); + count++; + } + while (count < end_id && control) { uint32_t cards = 0; for (int i = 0; i < combination.size(); ++i) { - cards = cards | (1 << combination[i]); - } - while (count < start_id) { - NextColex(combination, 2 * size_endgames); - count++; - } - while (count < end_id && control) { - uint32_t cards = 0; - for (int i = 0; i < combination.size(); ++i) { - cards = cards | (1 << combination[i]); - } - for (int i = 0; i < suit_splits.size(); ++i) { - std::array suit_arr; - suit_arr[0] = bzhi_u32(~0, suit_splits[i] & 0b1111); - uint32_t sum = suit_splits[i] & 0b1111; - for (int j = 1; j < kNumSuits; ++j) { - uint32_t mask = bzhi_u32(~0, sum); - sum += (suit_splits[i] & (0b1111 << (4 * j))) >> 4 * j; - suit_arr[j] = bzhi_u32(~0, sum); - suit_arr[j] = suit_arr[j] ^ mask; - } - Node node(cards, suit_arr, 0, false); - char result = IncrementalAlphaBetaMemoryIso(&node,0,size_endgames,2,TTable,&SuitRanks,bin_coeffs); - outTTable->Set(count,i, result); - } - control = NextColex(combination, 2 * size_endgames); - count++; - } + cards = cards | (1 << combination[i]); + } + for (int i = 0; i < suit_splits.size(); ++i) { + std::array suit_arr; + suit_arr[0] = bzhi_u32(~0, suit_splits[i] & 0b1111); + uint32_t sum = suit_splits[i] & 0b1111; + for (int j = 1; j < kNumSuits; ++j) { + uint32_t mask = bzhi_u32(~0, sum); + sum += (suit_splits[i] & (0b1111 << (4 * j))) >> 4 * j; + suit_arr[j] = bzhi_u32(~0, sum); + suit_arr[j] = suit_arr[j] ^ mask; + } + Node node(cards, suit_arr, 0, false); + char result = IncrementalAlphaBetaMemoryIso( + &node, 0, size_endgames, 2, TTable, &SuitRanks, bin_coeffs); + outTTable->Set(count, i, result); + } + control = NextColex(combination, 2 * size_endgames); + count++; + } } -vectorNa RetroSolver(int size_endgames, vectorNa* TTable, std::vector>& bin_coeffs) { - //takes endgames solved to depth d-1 and returns endgames solved to depth d // - vectorNa outTTable = InitialiseTTable(size_endgames, bin_coeffs); - std::vector suit_splits = GenQuads(size_endgames); - std::unordered_map SuitRanks; - GenSuitRankingsRel(size_endgames - 1, &SuitRanks); - std::vector combination; - combination.reserve(size_endgames); - for (int i = 0; i < size_endgames; ++i) { - combination.push_back(i); - } - uint32_t v_length = (suit_splits.size() >> 1) + 1; - uint32_t min_block_size = 256; - uint32_t hard_threads = std::thread::hardware_concurrency(); - uint32_t num_threads = 1; - uint32_t num_outers =outTTable.GetOuterSize(); - //a haphazard attempt to mitigate false sharing// - for (uint32_t i = hard_threads; i >= 1; i--) { - if ((num_outers * v_length / i) >= min_block_size) { - num_threads = i; - break; - } - } - std::vector threads = {}; - for (int i = 0; i < num_threads; ++i) { - uint32_t block_size = num_outers / num_threads; - uint32_t start_id; - uint32_t end_id; - if (num_threads == 1) { - start_id = 0; - end_id = num_outers; - } - else if (i == num_threads - 1) { - start_id = block_size * (num_threads - 1); - end_id = num_outers; - } - else { - start_id = block_size * i; - end_id = block_size * (i + 1); - } - threads.push_back(std::thread(ThreadSolver, size_endgames, &outTTable, TTable,std::ref(bin_coeffs), std::ref(suit_splits), std::ref(SuitRanks), start_id, end_id)); - } - for (int i = 0; i >& bin_coeffs) { + // takes endgames solved to depth d-1 and returns endgames solved to depth d + // // + vectorNa outTTable = InitialiseTTable(size_endgames, bin_coeffs); + std::vector suit_splits = GenQuads(size_endgames); + std::unordered_map SuitRanks; + GenSuitRankingsRel(size_endgames - 1, &SuitRanks); + std::vector combination; + combination.reserve(size_endgames); + for (int i = 0; i < size_endgames; ++i) { + combination.push_back(i); + } + uint32_t v_length = (suit_splits.size() >> 1) + 1; + uint32_t min_block_size = 256; + uint32_t hard_threads = std::thread::hardware_concurrency(); + uint32_t num_threads = 1; + uint32_t num_outers = outTTable.GetOuterSize(); + // a haphazard attempt to mitigate false sharing// + for (uint32_t i = hard_threads; i >= 1; i--) { + if ((num_outers * v_length / i) >= min_block_size) { + num_threads = i; + break; + } + } + std::vector threads = {}; + for (int i = 0; i < num_threads; ++i) { + uint32_t block_size = num_outers / num_threads; + uint32_t start_id; + uint32_t end_id; + if (num_threads == 1) { + start_id = 0; + end_id = num_outers; + } else if (i == num_threads - 1) { + start_id = block_size * (num_threads - 1); + end_id = num_outers; + } else { + start_id = block_size * i; + end_id = block_size * (i + 1); + } + threads.push_back(std::thread( + ThreadSolver, size_endgames, &outTTable, TTable, std::ref(bin_coeffs), + std::ref(suit_splits), std::ref(SuitRanks), start_id, end_id)); + } + for (int i = 0; i < num_threads; ++i) { + threads[i].join(); + } + return outTTable; } - -bool TestRetroSolve(int samples, int depth, uint32_t seed, std::vector>& bin_coeffs) { - //Tests endgame solution with TTable vs raw seach - std::vector nodes = GWhistGenerator(samples, seed); - vectorNa v; - for (int i = 1; i <= depth; ++i) { - v = RetroSolver(i, &v, bin_coeffs); - } - std::unordered_map SuitRanks; - GenSuitRankingsRel(depth, &SuitRanks); - for (auto it = nodes.begin(); it != nodes.end(); ++it) { - char abm_unsafe = IncrementalAlphaBetaMemoryIso(&*it, 0,kNumRanks, 2 * (kNumRanks - depth), &v, &SuitRanks, bin_coeffs); - char abm_safe = AlphaBeta(&*it, 0, kNumRanks); - if (abm_unsafe != abm_safe) { - return false; - } - } - return true; +bool TestRetroSolve(int samples, int depth, uint32_t seed, + const std::vector>& bin_coeffs) { + // Tests endgame solution with TTable vs raw seach + std::vector nodes = GWhistGenerator(samples, seed); + vectorNa v; + for (int i = 1; i <= depth; ++i) { + v = RetroSolver(i, &v, bin_coeffs); + } + std::unordered_map SuitRanks; + GenSuitRankingsRel(depth, &SuitRanks); + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + char abm_unsafe = IncrementalAlphaBetaMemoryIso(&*it, 0, kNumRanks, + 2 * (kNumRanks - depth), &v, + &SuitRanks, bin_coeffs); + char abm_safe = AlphaBeta(&*it, 0, kNumRanks); + if (abm_unsafe != abm_safe) { + return false; + } + } + return true; } -vectorNa BuildTablebase(std::vector>& bin_coeffs) { - vectorNa v; - std::cout<<"Building Tablebase"<<"\n"; - for (int i = 1; i <= kNumRanks; ++i) { - v = RetroSolver(i, &v, bin_coeffs); - std::cout<<"Done "<>& bin_coeffs) { + vectorNa v; + std::cout << "Building Tablebase" + << "\n"; + for (int i = 1; i <= kNumRanks; ++i) { + v = RetroSolver(i, &v, bin_coeffs); + std::cout << "Done " << i << "\n"; + } + std::cout << "Built Tablebase" + << "\n"; + return v; } -bool TestTablebase(int samples,uint32_t seed,vectorNa& table_base,std::vector>& bin_coeffs){ - std::vector nodes = GWhistGenerator(samples, seed); - std::unordered_map SuitRanks; - GenSuitRankingsRel(kNumRanks, &SuitRanks); - for (auto it = nodes.begin(); it != nodes.end(); ++it) { - char abm_unsafe = IncrementalAlphaBetaMemoryIso(&*it, 0,kNumRanks, 0, &table_base, &SuitRanks, bin_coeffs); - char abm_safe = AlphaBeta(&*it, 0, kNumRanks); - if (abm_unsafe != abm_safe) { - return false; - } - } - return true; +bool TestTablebase(int samples, uint32_t seed, const vectorNa& table_base, + const std::vector>& bin_coeffs) { + std::vector nodes = GWhistGenerator(samples, seed); + std::unordered_map SuitRanks; + GenSuitRankingsRel(kNumRanks, &SuitRanks); + for (auto it = nodes.begin(); it != nodes.end(); ++it) { + char abm_unsafe = IncrementalAlphaBetaMemoryIso( + &*it, 0, kNumRanks, 0, &table_base, &SuitRanks, bin_coeffs); + char abm_safe = AlphaBeta(&*it, 0, kNumRanks); + if (abm_unsafe != abm_safe) { + return false; + } + } + return true; } -void StoreTTable(const std::string filename, const vectorNa& solution){ - //stores solution into a text file// - std::ofstream file(filename); - for(int i =0;i>& bin_coeffs){ - //Tests storage fidelity// - StoreTTable(filename,v); - vectorNa new_v = LoadTTable(filename,depth,bin_coeffs); - for(int i =0;i>& bin_coeffs) { + // Tests storage fidelity// + StoreTTable(filename, v); + vectorNa new_v = LoadTTable(filename, depth, bin_coeffs); + for (int i = 0; i < v.GetOuterSize(); ++i) { + for (int j = 0; j < v.GetInnerSize(); ++j) { + if (v.GetChar(i, j) != new_v.GetChar(i, j)) { + return false; + } + } + } + return true; } -}//germanwhist -}//open_spiel +} // namespace german_whist_foregame +} // namespace open_spiel -int main(){ - std::vector> bin_coeffs = open_spiel::german_whist_foregame::BinCoeffs(2*open_spiel::german_whist_foregame::kNumRanks); - open_spiel::german_whist_foregame::vectorNa tablebase = open_spiel::german_whist_foregame::BuildTablebase(bin_coeffs); - std::random_device rd; - int num_samples = 100; - if(open_spiel::german_whist_foregame::TestTablebase(num_samples,rd(),tablebase,bin_coeffs)){ - std::cout<<"Tablebase accurate"<> bin_coeffs = + open_spiel::german_whist_foregame::BinCoeffs( + 2 * open_spiel::german_whist_foregame::kNumRanks); + open_spiel::german_whist_foregame::vectorNa tablebase = + open_spiel::german_whist_foregame::BuildTablebase(bin_coeffs); + std::random_device rd; + int num_samples = 100; + if (open_spiel::german_whist_foregame::TestTablebase(num_samples, rd(), + tablebase, bin_coeffs)) { + std::cout << "Tablebase accurate" << std::endl; + } else { + std::cout << "Tablebase inaccurate" << std::endl; + } + std::cout << "Starting Saving Tablebase" << std::endl; + open_spiel::german_whist_foregame::StoreTTable("TTable13.txt", tablebase); + std::cout << "Finished Saving Tablebase" << std::endl; } - diff --git a/open_spiel/games/german_whist_foregame/german_whist_foregame.cc b/open_spiel/games/german_whist_foregame/german_whist_foregame.cc index 7d0200c6f8..97397a4dff 100644 --- a/open_spiel/games/german_whist_foregame/german_whist_foregame.cc +++ b/open_spiel/games/german_whist_foregame/german_whist_foregame.cc @@ -1,235 +1,225 @@ +#include "open_spiel/games/german_whist_foregame/german_whist_foregame.h" + #include + #include "open_spiel/abseil-cpp/absl/strings/str_cat.h" #include "open_spiel/game_parameters.h" #include "open_spiel/observer.h" #include "open_spiel/policy.h" #include "open_spiel/spiel.h" #include "open_spiel/spiel_utils.h" -#include "open_spiel/games/german_whist_foregame/german_whist_foregame.h" -// define BMI2 only if your system supports BMI2 intrinsics, modify compiler flags so that bmi2 instructions are compiled// -// #define __BMI2__ +// define BMI2 only if your system supports BMI2 intrinsics, modify compiler +// flags so that bmi2 instructions are compiled// #define __BMI2__ #ifdef __BMI2__ #include #endif namespace open_spiel { namespace german_whist_foregame { -// set this to the path you expect TTable to be once you have made it so recompilation is not necessary// -std::string kTTablePath=""; +// set this to the path you expect TTable to be once you have made it so +// recompilation is not necessary// +std::string kTTablePath = ""; -uint32_t tzcnt_u32(uint32_t a) { - return __builtin_ctz(a); -} -uint64_t tzcnt_u64(uint64_t a) { - return __builtin_ctzll(a); -} -uint32_t bzhi_u32(uint32_t a,uint32_t b) { - return a&((1u<>1; - m = m>>1; - }while(m!=0); - return r; + uint64_t r = 0; + uint64_t s = 0; + uint64_t b = 0; + do { + b = m & 1; + r = r | ((x & b) << s); + s = s + b; + x = x >> 1; + m = m >> 1; + } while (m != 0); + return r; #endif } -bool Triple::operator<(const Triple& triple)const { - return (length < triple.length)|| (length == triple.length && sig < triple.sig); +bool Triple::operator<(const Triple& triple) const { + return (length < triple.length) || + (length == triple.length && sig < triple.sig); } inline int CardRank(int card, int suit) { - uint64_t card_mask = ((uint64_t)1 << card); - card_mask = (card_mask >> (suit * kNumRanks)); - return tzcnt_u64(card_mask); + uint64_t card_mask = ((uint64_t)1 << card); + card_mask = (card_mask >> (suit * kNumRanks)); + return tzcnt_u64(card_mask); } inline int CardSuit(int card) { - uint64_t card_mask = ((uint64_t)1 << card); - for (int i = 0; i < kNumSuits; ++i) { - if (popcnt_u64(card_mask & kSuitMasks[i]) == 1) { - return i; - } + uint64_t card_mask = ((uint64_t)1 << card); + for (int i = 0; i < kNumSuits; ++i) { + if (popcnt_u64(card_mask & kSuitMasks[i]) == 1) { + return i; } - return kNumSuits; + } + return kNumSuits; } std::string CardString(int card) { - int suit = CardSuit(card); - return { kSuitChar[suit],kRankChar[CardRank(card,suit)] }; + int suit = CardSuit(card); + return {kSuitChar[suit], kRankChar[CardRank(card, suit)]}; } std::vector GenQuads(int size_endgames) { - // Generates Suit splittings for endgames of a certain size// - std::vector v; - for (char i = 0; i <= std::min(size_endgames * 2, kNumRanks); ++i) { - int sum = size_endgames * 2 - i; - for (char j = 0; j <= std::min(sum, kNumRanks); ++j) { - for (char k = std::max((int)j, sum - j - kNumRanks); k <= std::min(sum - j, kNumRanks); ++k) { - char l = sum - j - k; - if (l < k) { - break; - } - else { - uint32_t num = 0; - num = num | (i); - num = num | (j << 4); - num = num | (k << 8); - num = num | (l << 12); - v.push_back(num); - } - } + // Generates Suit splittings for endgames of a certain size// + std::vector v; + for (char i = 0; i <= std::min(size_endgames * 2, kNumRanks); ++i) { + int sum = size_endgames * 2 - i; + for (char j = 0; j <= std::min(sum, kNumRanks); ++j) { + for (char k = std::max((int)j, sum - j - kNumRanks); + k <= std::min(sum - j, kNumRanks); ++k) { + char l = sum - j - k; + if (l < k) { + break; + } else { + uint32_t num = 0; + num = num | (i); + num = num | (j << 4); + num = num | (k << 8); + num = num | (l << 12); + v.push_back(num); } + } } - return v; + } + return v; } std::vector> BinCoeffs(uint32_t max_n) { - //tabulates binomial coefficients// - std::vector> C(max_n+1,std::vector(max_n+1)); - for (uint32_t i = 1; i <= max_n; ++i) { - C[0][i] = 0; - } - for (uint32_t i = 0; i <= max_n; ++i) { - C[i][0] = 1; - } - for (uint32_t i = 1; i <= max_n; ++i) { - for (uint32_t j = 1; j <= max_n; ++j) { - C[i][j] = C[i - 1][j] + C[i - 1][j - 1]; - } - } - return C; -} -uint32_t HalfColexer(uint32_t cards,const std::vector>* bin_coeffs) { - //returns the colexicographical ranking of a combination of indices where the the size of the combination is half that of the set of indices// - uint32_t out = 0; - uint32_t count = 0; - while (cards != 0) { - uint32_t ind = tzcnt_u32(cards); - uint32_t val = bin_coeffs->at(ind)[count+1]; - out += val; - cards = blsr_u32(cards); - count++; - } - return out; -} -void GenSuitRankingsRel(uint32_t size, std::unordered_map* Ranks) { - //Generates ranking Table for suit splittings for endgames of a certain size// - std::vector v=GenQuads(size); - for (uint32_t i = 0; i < v.size(); ++i) { - Ranks->insert({ v[i],i }); - } + // tabulates binomial coefficients// + std::vector> C(max_n + 1, + std::vector(max_n + 1)); + for (uint32_t i = 1; i <= max_n; ++i) { + C[0][i] = 0; + } + for (uint32_t i = 0; i <= max_n; ++i) { + C[i][0] = 1; + } + for (uint32_t i = 1; i <= max_n; ++i) { + for (uint32_t j = 1; j <= max_n; ++j) { + C[i][j] = C[i - 1][j] + C[i - 1][j - 1]; + } + } + return C; +} +uint32_t HalfColexer(uint32_t cards, + const std::vector>* bin_coeffs) { + // returns the colexicographical ranking of a combination of indices where the + // the size of the combination is half that of the set of indices// + uint32_t out = 0; + uint32_t count = 0; + while (cards != 0) { + uint32_t ind = tzcnt_u32(cards); + uint32_t val = bin_coeffs->at(ind)[count + 1]; + out += val; + cards = blsr_u32(cards); + count++; + } + return out; +} +void GenSuitRankingsRel(uint32_t size, + std::unordered_map* Ranks) { + // Generates ranking Table for suit splittings for endgames of a certain + // size// + std::vector v = GenQuads(size); + for (uint32_t i = 0; i < v.size(); ++i) { + Ranks->insert({v[i], i}); + } } -vectorNa::vectorNa(size_t card_combs,size_t suit_splits,char val) { - data=std::vector(card_combs*((suit_splits>>1)+1),val); - inner_size =(suit_splits>>1)+1; - outer_size = card_combs; +vectorNa::vectorNa(size_t card_combs, size_t suit_splits, char val) { + data = std::vector(card_combs * ((suit_splits >> 1) + 1), val); + inner_size = (suit_splits >> 1) + 1; + outer_size = card_combs; } vectorNa::vectorNa() { - data={}; - inner_size=0; - outer_size=0; -} -size_t vectorNa::size() const { - return data.size(); -} -size_t vectorNa::GetInnerSize() const { - return inner_size; -} -size_t vectorNa::GetOuterSize() const { - return outer_size; -} -char const& vectorNa::operator[](size_t index) const { - return data[index]; -} -char vectorNa::GetChar(size_t i,size_t j) const { - return data[i*inner_size+j]; -} -void vectorNa::SetChar(size_t i,size_t j,char value){ - data[i*inner_size+j]=value; -} -char vectorNa::Get(size_t i,size_t j) const { - int remainder = j&0b1; - if(remainder==0){ - return 0b1111&data[i*inner_size+(j>>1)]; - } - else{ - return ((0b11110000&data[i*inner_size+(j>>1)])>>4); - } -} -void vectorNa::Set(size_t i,size_t j,char value) { - int remainder = j & 0b1; - if (remainder == 0) { - char datastore = 0b11110000 & data[i*inner_size+(j>>1)]; - data[i*inner_size+(j>>1)] = datastore|value; - } - else { - char datastore = (0b1111 & data[i*inner_size+(j>>1)]); - data[i*inner_size+(j>>1)] = datastore|(value << 4); - } -} -vectorNa InitialiseTTable(int size,std::vector>& bin_coeffs) { - //initialises TTable for a certain depth// - size_t suit_size = GenQuads(size).size(); - return vectorNa(bin_coeffs[2 * size][size],suit_size, 0); -} -vectorNa LoadTTable(const std::string filename, int depth,std::vector>& bin_coeffs) { - //loads solution from a text file into a vector for use// - std::cout<<"Loading Tablebase"<<"\n"; - vectorNa v = InitialiseTTable(depth,bin_coeffs); - std::ifstream file(filename,std::ios::binary); - if (!file.is_open()) { - std::cout<<"Failed to load Tablebase"<<"\n"; - std::cout<<"Tablebase will be set to all 0"<<"\n"; - file.close(); - return v; - } - else { - char c; - for (int i =0;i> 1)]; + } else { + return ((0b11110000 & data[i * inner_size + (j >> 1)]) >> 4); + } +} +void vectorNa::Set(size_t i, size_t j, char value) { + int remainder = j & 0b1; + if (remainder == 0) { + char datastore = 0b11110000 & data[i * inner_size + (j >> 1)]; + data[i * inner_size + (j >> 1)] = datastore | value; + } else { + char datastore = (0b1111 & data[i * inner_size + (j >> 1)]); + data[i * inner_size + (j >> 1)] = datastore | (value << 4); + } +} +vectorNa InitialiseTTable(int size, + const std::vector>& bin_coeffs) { + // initialises TTable for a certain depth// + size_t suit_size = GenQuads(size).size(); + return vectorNa(bin_coeffs[2 * size][size], suit_size, 0); +} +vectorNa LoadTTable(const std::string filename, int depth, + const std::vector>& bin_coeffs) { + // loads solution from a text file into a vector for use// + std::cout << "Loading Tablebase" + << "\n"; + vectorNa v = InitialiseTTable(depth, bin_coeffs); + std::ifstream file(filename, std::ios::binary); + if (!file.is_open()) { + std::cout << "Failed to load Tablebase" + << "\n"; + std::cout << "Tablebase will be set to all 0" + << "\n"; + file.close(); + return v; + } else { + char c; + for (int i = 0; i < v.GetOuterSize(); ++i) { + for (int j = 0; j < v.GetInnerSize(); ++j) { + file.get(c); + v.SetChar(i, j, c); + } + } + file.close(); + std::cout << "Tablebase Loaded" + << "\n"; + return v; + } } // Default parameters. -namespace {//namespace +namespace { // namespace // Facts about the game -const GameType kGameType{/*short_name=*/"german_whist_foregame", +const GameType kGameType{ + /*short_name=*/"german_whist_foregame", /*long_name=*/"german_whist_foregame", GameType::Dynamics::kSequential, GameType::ChanceMode::kExplicitStochastic, @@ -245,433 +235,461 @@ const GameType kGameType{/*short_name=*/"german_whist_foregame", }; std::shared_ptr Factory(const GameParameters& params) { - return std::shared_ptr(new GWhistFGame(params)); + return std::shared_ptr(new GWhistFGame(params)); } REGISTER_SPIEL_GAME(kGameType, Factory); -}//namespace +} // namespace -GWhistFGame::GWhistFGame(const GameParameters& params):Game(kGameType, params) { - bin_coeffs_=BinCoeffs(2*kNumRanks); - std::unordered_map temp; - GenSuitRankingsRel(13,&temp); - suit_ranks_=temp; - ttable_ = LoadTTable(kTTablePath,13,bin_coeffs_); +GWhistFGame::GWhistFGame(const GameParameters& params) + : Game(kGameType, params) { + bin_coeffs_ = BinCoeffs(2 * kNumRanks); + std::unordered_map temp; + GenSuitRankingsRel(13, &temp); + suit_ranks_ = temp; + ttable_ = LoadTTable(kTTablePath, 13, bin_coeffs_); }; std::unique_ptr GWhistFGame::NewInitialState() const { - const auto ptr=std::dynamic_pointer_cast(shared_from_this()); - return std::make_unique(ptr); + const auto ptr = + std::dynamic_pointer_cast(shared_from_this()); + return std::make_unique(ptr); } - -GWhistFState::GWhistFState(std::shared_ptr game):State(game) { - player_ = kChancePlayerId; - move_number_ = 0; - trump_ = -1; - deck_ = bzhi_u64(~0,kNumRanks*kNumSuits); - discard_ = 0; - hands_ = { 0,0 }; - history_.reserve(78); - ttable_ = &(game->ttable_); - suit_ranks_ =&(game->suit_ranks_); - bin_coeffs_=&(game->bin_coeffs_); +GWhistFState::GWhistFState(std::shared_ptr game) + : State(game) { + player_ = kChancePlayerId; + move_number_ = 0; + trump_ = -1; + deck_ = bzhi_u64(~0, kNumRanks * kNumSuits); + discard_ = 0; + hands_ = {0, 0}; + history_.reserve(78); + ttable_ = &(game->ttable_); + suit_ranks_ = &(game->suit_ranks_); + bin_coeffs_ = &(game->bin_coeffs_); } bool GWhistFState::Trick(int lead, int follow) const { - int lead_suit = CardSuit(lead); - int follow_suit = CardSuit(follow); - int lead_rank = CardRank(lead,lead_suit); - int follow_rank = CardRank(follow,follow_suit); - return (lead_suit == follow_suit && lead_rank < follow_rank) || (lead_suit != follow_suit && follow_suit != trump_); -} -bool GWhistFState::IsTerminal() const { - return(popcnt_u64(deck_) == 0); -} + int lead_suit = CardSuit(lead); + int follow_suit = CardSuit(follow); + int lead_rank = CardRank(lead, lead_suit); + int follow_rank = CardRank(follow, follow_suit); + return (lead_suit == follow_suit && lead_rank < follow_rank) || + (lead_suit != follow_suit && follow_suit != trump_); +} +bool GWhistFState::IsTerminal() const { return (popcnt_u64(deck_) == 0); } uint64_t GWhistFState::EndgameKey(int player_to_move) const { - //generates a 64 bit unsigned int where the first 32 are the suit ownerships from the perspective of the opponent using canonical rankings// - //example: if Spade suit is to_move = A3, opp =2, suit = 0b100 - //least significant part of first 32 bits is the trump suit, then the remaining suits ascending length order. - uint64_t cards_in_play = hands_[0]|hands_[1]; - std::vector suit_lengths = {}; - int opp = (player_to_move==0)?1:0; - //sort trump suits by length,then sig// - for (int i =0;i hand0; - std::array hand1; - hand0[0]=pext_u64(hands_[0],kSuitMasks[trump_]); - hand1[0]=pext_u64(hands_[1],kSuitMasks[trump_]); - for (int i =0;ihands_shuffled = {0,0}; - for (int i =0;i suit_lengths = {}; + int opp = (player_to_move == 0) ? 1 : 0; + // sort trump suits by length,then sig// + for (int i = 0; i < kNumSuits; ++i) { + if (i != trump_) { + uint64_t sig = + pext_u64(hands_[opp] & kSuitMasks[i], cards_in_play & kSuitMasks[i]); + suit_lengths.push_back( + Triple{i, popcnt_u64(kSuitMasks[i] & cards_in_play), sig}); + } + } + std::sort(suit_lengths.begin(), suit_lengths.end()); + std::array hand0; + std::array hand1; + hand0[0] = pext_u64(hands_[0], kSuitMasks[trump_]); + hand1[0] = pext_u64(hands_[1], kSuitMasks[trump_]); + for (int i = 0; i < kNumSuits - 1; ++i) { + hand0[i + 1] = pext_u64(hands_[0], kSuitMasks[suit_lengths[i].index]); + hand1[i + 1] = pext_u64(hands_[1], kSuitMasks[suit_lengths[i].index]); + } + std::array hands_shuffled = {0, 0}; + for (int i = 0; i < kNumSuits; ++i) { + hands_shuffled[0] = hands_shuffled[0] | (hand0[i] << (kNumRanks * i)); + hands_shuffled[1] = hands_shuffled[1] | (hand1[i] << (kNumRanks * i)); + } + uint64_t suit_sig = 0; + suit_sig = popcnt_u64(kSuitMasks[trump_] & cards_in_play); + for (int i = 0; i < kNumSuits - 1; ++i) { + suit_sig = suit_sig | ((uint64_t)suit_lengths[i].length << (4 * (i + 1))); + } + suit_sig = (suit_sig << 32); + cards_in_play = hands_shuffled[0] | hands_shuffled[1]; + uint64_t cards = pext_u64(hands_shuffled[opp], cards_in_play); + uint64_t key = cards | suit_sig; + return key; } std::vector GWhistFState::Returns() const { - if (IsTerminal()) { - std::vector out = {0,0}; - int lead_win = Trick(history_[move_number_ - 3].action, history_[move_number_ - 2].action); - int player_to_move=(lead_win)?history_[move_number_-3].player:history_[move_number_-2].player; - int opp = (player_to_move==0)?1:0; - uint64_t key = EndgameKey(player_to_move); - uint32_t cards = (key&bzhi_u64(~0,32)); - uint32_t colex = HalfColexer(cards,bin_coeffs_); - uint32_t suits = (key&(~0^bzhi_u64(~0,32)))>>32; - uint32_t suit_rank = suit_ranks_->at(suits); - char value =ttable_->Get(colex,suit_rank); - out[player_to_move] = 2*value-kNumRanks; - out[opp]=-out[player_to_move]; - return out; - } - else { - std::vector out = {0,0}; - return out; - } + if (IsTerminal()) { + std::vector out = {0, 0}; + int lead_win = Trick(history_[move_number_ - 3].action, + history_[move_number_ - 2].action); + int player_to_move = (lead_win) ? history_[move_number_ - 3].player + : history_[move_number_ - 2].player; + int opp = (player_to_move == 0) ? 1 : 0; + uint64_t key = EndgameKey(player_to_move); + uint32_t cards = (key & bzhi_u64(~0, 32)); + uint32_t colex = HalfColexer(cards, bin_coeffs_); + uint32_t suits = (key & (~0 ^ bzhi_u64(~0, 32))) >> 32; + uint32_t suit_rank = suit_ranks_->at(suits); + char value = ttable_->Get(colex, suit_rank); + out[player_to_move] = 2 * value - kNumRanks; + out[opp] = -out[player_to_move]; + return out; + } else { + std::vector out = {0, 0}; + return out; + } } - int GWhistFState::CurrentPlayer() const { return player_; } std::vector> GWhistFState::ChanceOutcomes() const { - std::vector> outcomes; - std::vector legal_actions = LegalActions(); - for (int i =0;i pair; - pair.first =legal_actions[i]; - pair.second = 1.0/legal_actions.size(); - outcomes.push_back(pair); - } - return outcomes; -} -std::string GWhistFState::ActionToString(Player player,Action move) const { - return CardString(move); + std::vector> outcomes; + std::vector legal_actions = LegalActions(); + for (int i = 0; i < legal_actions.size(); ++i) { + std::pair pair; + pair.first = legal_actions[i]; + pair.second = 1.0 / legal_actions.size(); + outcomes.push_back(pair); + } + return outcomes; +} +std::string GWhistFState::ActionToString(Player player, Action move) const { + return CardString(move); } std::string GWhistFState::ToString() const { - std::string out; - for (int i = 0; i < history_.size(); ++i) { - out += ActionToString(history_[i].player, history_[i].action); - out += "\n"; - } - return out; + std::string out; + for (int i = 0; i < history_.size(); ++i) { + out += ActionToString(history_[i].player, history_[i].action); + out += "\n"; + } + return out; } std::unique_ptr GWhistFState::Clone() const { - return std::unique_ptr(new GWhistFState(*this)); + return std::unique_ptr(new GWhistFState(*this)); } std::string GWhistFState::StateToString() const { - //doesnt use history in case of a resampled state with unreconciled history// - std::string out; - uint64_t copy_deck = deck_; - uint64_t copy_discard = discard_; - std::array copy_hands = hands_; - std::vector deck_cards; - std::vector player0_cards; - std::vector player1_cards; - std::vector discard; - while (copy_deck != 0) { - deck_cards.push_back(tzcnt_u64(copy_deck)); - copy_deck = blsr_u64(copy_deck); - } - while (copy_discard != 0) { - discard.push_back(tzcnt_u64(copy_discard)); - copy_discard = blsr_u64(copy_discard); - } + // doesnt use history in case of a resampled state with unreconciled history// + std::string out; + uint64_t copy_deck = deck_; + uint64_t copy_discard = discard_; + std::array copy_hands = hands_; + std::vector deck_cards; + std::vector player0_cards; + std::vector player1_cards; + std::vector discard; + while (copy_deck != 0) { + deck_cards.push_back(tzcnt_u64(copy_deck)); + copy_deck = blsr_u64(copy_deck); + } + while (copy_discard != 0) { + discard.push_back(tzcnt_u64(copy_discard)); + copy_discard = blsr_u64(copy_discard); + } - while (copy_hands[0] != 0) { - player0_cards.push_back(tzcnt_u64(copy_hands[0])); - copy_hands[0] = blsr_u64(copy_hands[0]); - } - while (copy_hands[1] != 0) { - player1_cards.push_back(tzcnt_u64(copy_hands[1])); - copy_hands[1] = blsr_u64(copy_hands[1]); - } - out += "Deck \n"; - for (int i = 0; i < deck_cards.size(); ++i) { - out += CardString(deck_cards[i]) + "\n"; - } - out += "Discard \n"; - for (int i = 0; i < discard.size(); ++i) { - out += CardString(discard[i]) + "\n"; - } + while (copy_hands[0] != 0) { + player0_cards.push_back(tzcnt_u64(copy_hands[0])); + copy_hands[0] = blsr_u64(copy_hands[0]); + } + while (copy_hands[1] != 0) { + player1_cards.push_back(tzcnt_u64(copy_hands[1])); + copy_hands[1] = blsr_u64(copy_hands[1]); + } + out += "Deck \n"; + for (int i = 0; i < deck_cards.size(); ++i) { + out += CardString(deck_cards[i]) + "\n"; + } + out += "Discard \n"; + for (int i = 0; i < discard.size(); ++i) { + out += CardString(discard[i]) + "\n"; + } - for (int i = 0; i < 2; ++i) { - out += "Player " + std::to_string(i) + "\n"; - std::vector var; - if (i == 0) { - var = player0_cards; - } - else { - var = player1_cards; - } - for (int j = 0; j < var.size(); ++j) { - out += CardString(var[j]) + "\n"; - } + for (int i = 0; i < 2; ++i) { + out += "Player " + std::to_string(i) + "\n"; + std::vector var; + if (i == 0) { + var = player0_cards; + } else { + var = player1_cards; } - return out; + for (int j = 0; j < var.size(); ++j) { + out += CardString(var[j]) + "\n"; + } + } + return out; } std::string GWhistFState::InformationStateString(Player player) const { - // THIS IS WHAT A PLAYER IS SHOWN WHEN PLAYING// - SPIEL_CHECK_TRUE(player >= 0 && player < 2); - std::string p = std::to_string(player)+","; - std::string cur_hand = ""; - std::string observations=""; - std::vector v_hand = {}; - uint64_t p_hand = hands_[player]; - while (p_hand!=0) { - v_hand.push_back(tzcnt_u64(p_hand)); - p_hand = blsr_u64(p_hand); - } - std::sort(v_hand.begin(),v_hand.end()); - for (int i =0;i GWhistFState::ResampleFromInfostate(int player_id,std::function rng) const { - //only valid when called from a position where a player can act// - auto resampled_state = std::unique_ptr(new GWhistFState(*this)); - //seeding mt19937// - std::random_device rd; - std::mt19937 gen(rd()); - uint64_t necessary_cards = 0; - for (int i = 2 * kNumRanks; i < history_.size(); i+=4) { - //face up cards from deck// - necessary_cards = (necessary_cards | (uint64_t(1) << history_[i].action)); - } - int move_index = move_number_ - ((kNumRanks * kNumSuits) / 2); - int move_remainder = move_index % 4; - int opp = (player_id == 0) ? 1 : 0; - int recent_faceup = move_number_ - move_remainder; - uint64_t recent_faceup_card = (uint64_t(1) << history_[recent_faceup].action); - // if a face up card from the deck is not in players hand or discard it must be in opps unless it is the most recent face up// - necessary_cards = (necessary_cards & (~(hands_[player_id] | discard_|recent_faceup_card))); - //sufficient cards are all cards not in players hand,the discard, or the recent face up// - uint64_t sufficient_cards = (bzhi_u64(~0, kNumRanks * kNumSuits) ^(hands_[player_id] | discard_|recent_faceup_card)); - //sufficient_cards are not necessary // - sufficient_cards = (sufficient_cards & (~(necessary_cards))); - //we must now take into account the observation of voids// - std::array when_voided = {0,0,0,0}; - std::array voids = {-1,-1,-1,-1}; - std::vector opp_dealt_hidden; - for (int i = 2 * kNumRanks; i < history_.size(); ++i) { - if (history_[i - 1].player == player_id && history_[i].player == (opp) && CardSuit(history_[i-1].action)!=CardSuit(history_[i].action)) { - when_voided[CardSuit(history_[i - 1].action)] = i - 1; - } - if (history_[i - 1].player == player_id && history_[i].player == (opp) && Trick(history_[i - 1].action, history_[i].action)) { - opp_dealt_hidden.push_back(i - 1); - } - if (history_[i - 1].player == (opp) && history_[i].player == (player_id) && !Trick(history_[i - 1].action, history_[i].action)) { - opp_dealt_hidden.push_back(i - 1); - } - } - //now voids contains the number of hidden cards dealt to opp since it showed a void in that suit, i.e the maximum number of cards held in that suit// - //if the suit is unvoided, then this number is -1// - for (int i = 0; i < kNumSuits; ++i) { - if (when_voided[i] != 0) { - voids[i] = 0; - for (int j = 0; j < opp_dealt_hidden.size(); ++j) { - if (opp_dealt_hidden[j] >= when_voided[i]) { - voids[i] += 1; - } - } - } + // THIS IS WHAT A PLAYER IS SHOWN WHEN PLAYING// + SPIEL_CHECK_TRUE(player >= 0 && player < 2); + std::string p = std::to_string(player) + ","; + std::string cur_hand = ""; + std::string observations = ""; + std::vector v_hand = {}; + uint64_t p_hand = hands_[player]; + while (p_hand != 0) { + v_hand.push_back(tzcnt_u64(p_hand)); + p_hand = blsr_u64(p_hand); + } + std::sort(v_hand.begin(), v_hand.end()); + for (int i = 0; i < v_hand.size(); ++i) { + cur_hand = cur_hand + CardString(v_hand[i]); + cur_hand = cur_hand + ","; + } + cur_hand += "\n"; + for (int i = 2 * kNumRanks; i < history_.size(); ++i) { + int index = (i - 2 * kNumRanks) % 4; + switch (index) { + case 0: + observations = + observations + "c_public:" + CardString(history_[i].action) + ","; + break; + case 1: + observations = observations + "p" + std::to_string(history_[i].player) + + ":" + CardString(history_[i].action) + ","; + break; + case 2: + observations = observations + "p" + std::to_string(history_[i].player) + + ":" + CardString(history_[i].action) + ","; + break; + case 3: + int lead_win = Trick(history_[i - 2].action, history_[i - 1].action); + int loser = ((lead_win) ^ (history_[i - 2].player == 0)) ? 0 : 1; + if (loser == player) { + observations = observations + + "c_observed:" + CardString(history_[i].action) + "\n"; + } else { + observations = observations + "c_unobserved:" + "\n"; } - //we now perform a sequence of shuffles to generate a possible opponent hand, and make no attempt to reconcile the history with this new deal// - int nec = popcnt_u64(necessary_cards); - for (int i = 0; i < kNumSuits; ++i) { - if (voids[i] != -1&&popcnt_u64(sufficient_cards&kSuitMasks[i])>voids[i]) { - uint64_t suit_subset = (sufficient_cards & kSuitMasks[i]); - std::vector temp; - while (suit_subset != 0) { - temp.push_back(tzcnt_u64(suit_subset)); - suit_subset = blsr_u64(suit_subset); - } - std::shuffle(temp.begin(), temp.end(), gen); - sufficient_cards = (sufficient_cards &~(kSuitMasks[i])); - for (int j = 0; j < voids[i]; ++j) { - sufficient_cards = (sufficient_cards | (uint64_t(1) << temp[j])); - } - } + break; + } + } + return p + cur_hand + observations; +} +std::unique_ptr GWhistFState::ResampleFromInfostate( + int player_id, std::function rng) const { + // only valid when called from a position where a player can act// + auto resampled_state = std::unique_ptr(new GWhistFState(*this)); + // seeding mt19937// + std::random_device rd; + std::mt19937 gen(rd()); + uint64_t necessary_cards = 0; + for (int i = 2 * kNumRanks; i < history_.size(); i += 4) { + // face up cards from deck// + necessary_cards = (necessary_cards | (uint64_t(1) << history_[i].action)); + } + int move_index = move_number_ - ((kNumRanks * kNumSuits) / 2); + int move_remainder = move_index % 4; + int opp = (player_id == 0) ? 1 : 0; + int recent_faceup = move_number_ - move_remainder; + uint64_t recent_faceup_card = (uint64_t(1) << history_[recent_faceup].action); + // if a face up card from the deck is not in players hand or discard it must + // be in opps unless it is the most recent face up// + necessary_cards = (necessary_cards & + (~(hands_[player_id] | discard_ | recent_faceup_card))); + // sufficient cards are all cards not in players hand,the discard, or the + // recent face up// + uint64_t sufficient_cards = + (bzhi_u64(~0, kNumRanks * kNumSuits) ^ + (hands_[player_id] | discard_ | recent_faceup_card)); + // sufficient_cards are not necessary // + sufficient_cards = (sufficient_cards & (~(necessary_cards))); + // we must now take into account the observation of voids// + std::array when_voided = {0, 0, 0, 0}; + std::array voids = {-1, -1, -1, -1}; + std::vector opp_dealt_hidden; + for (int i = 2 * kNumRanks; i < history_.size(); ++i) { + if (history_[i - 1].player == player_id && history_[i].player == (opp) && + CardSuit(history_[i - 1].action) != CardSuit(history_[i].action)) { + when_voided[CardSuit(history_[i - 1].action)] = i - 1; + } + if (history_[i - 1].player == player_id && history_[i].player == (opp) && + Trick(history_[i - 1].action, history_[i].action)) { + opp_dealt_hidden.push_back(i - 1); + } + if (history_[i - 1].player == (opp) && history_[i].player == (player_id) && + !Trick(history_[i - 1].action, history_[i].action)) { + opp_dealt_hidden.push_back(i - 1); + } + } + // now voids contains the number of hidden cards dealt to opp since it showed + // a void in that suit, i.e the maximum number of cards held in that suit// if + // the suit is unvoided, then this number is -1// + for (int i = 0; i < kNumSuits; ++i) { + if (when_voided[i] != 0) { + voids[i] = 0; + for (int j = 0; j < opp_dealt_hidden.size(); ++j) { + if (opp_dealt_hidden[j] >= when_voided[i]) { + voids[i] += 1; } - //finally generating a possible hand for opponent// - std::vector hand_vec; - while (sufficient_cards != 0) { - hand_vec.push_back(tzcnt_u64(sufficient_cards)); - sufficient_cards = blsr_u64(sufficient_cards); - } - std::shuffle(hand_vec.begin(), hand_vec.end(), gen); - uint64_t suff_hand = 0; - uint64_t opp_hand=0; - for (int i = 0; i < popcnt_u64(hands_[opp])-nec; ++i) { - suff_hand = suff_hand | (uint64_t(1) << hand_vec[i]); - } - opp_hand = suff_hand | necessary_cards; - resampled_state->hands_[opp] = opp_hand; - resampled_state->deck_ = bzhi_u64(~0, kNumRanks * kNumSuits) ^ (discard_ | opp_hand | hands_[player_id]|recent_faceup_card); - return resampled_state; - } + } + } + } + // we now perform a sequence of shuffles to generate a possible opponent hand, + // and make no attempt to reconcile the history with this new deal// + int nec = popcnt_u64(necessary_cards); + for (int i = 0; i < kNumSuits; ++i) { + if (voids[i] != -1 && + popcnt_u64(sufficient_cards & kSuitMasks[i]) > voids[i]) { + uint64_t suit_subset = (sufficient_cards & kSuitMasks[i]); + std::vector temp; + while (suit_subset != 0) { + temp.push_back(tzcnt_u64(suit_subset)); + suit_subset = blsr_u64(suit_subset); + } + std::shuffle(temp.begin(), temp.end(), gen); + sufficient_cards = (sufficient_cards & ~(kSuitMasks[i])); + for (int j = 0; j < voids[i]; ++j) { + sufficient_cards = (sufficient_cards | (uint64_t(1) << temp[j])); + } + } + } + // finally generating a possible hand for opponent// + std::vector hand_vec; + while (sufficient_cards != 0) { + hand_vec.push_back(tzcnt_u64(sufficient_cards)); + sufficient_cards = blsr_u64(sufficient_cards); + } + std::shuffle(hand_vec.begin(), hand_vec.end(), gen); + uint64_t suff_hand = 0; + uint64_t opp_hand = 0; + for (int i = 0; i < popcnt_u64(hands_[opp]) - nec; ++i) { + suff_hand = suff_hand | (uint64_t(1) << hand_vec[i]); + } + opp_hand = suff_hand | necessary_cards; + resampled_state->hands_[opp] = opp_hand; + resampled_state->deck_ = + bzhi_u64(~0, kNumRanks * kNumSuits) ^ + (discard_ | opp_hand | hands_[player_id] | recent_faceup_card); + return resampled_state; +} std::string GWhistFState::ObservationString(Player player) const { - //note this is a lie, this is not the observation state string but it is used for ISMCTS to label nodes// - SPIEL_CHECK_TRUE(player >= 0 && player < 2); - std::string p = "p"+std::to_string(player)+","; - std::string cur_hand=""; - std::string public_info = ""; - uint64_t p_hand = hands_[player]; - std::vector v_hand = {}; - while (p_hand!=0) { - v_hand.push_back(tzcnt_u64(p_hand)); - p_hand = blsr_u64(p_hand); - } - std::sort(v_hand.begin(),v_hand.end()); - for (int i =0;i= 0 && player < 2); + std::string p = "p" + std::to_string(player) + ","; + std::string cur_hand = ""; + std::string public_info = ""; + uint64_t p_hand = hands_[player]; + std::vector v_hand = {}; + while (p_hand != 0) { + v_hand.push_back(tzcnt_u64(p_hand)); + p_hand = blsr_u64(p_hand); + } + std::sort(v_hand.begin(), v_hand.end()); + for (int i = 0; i < v_hand.size(); ++i) { + cur_hand = cur_hand + CardString(v_hand[i]) + ","; + } + for (int i = 2 * kNumRanks; i < history_.size(); ++i) { + int index = (i - 2 * kNumRanks) % 4; + if (index != 3) { + public_info = public_info + std::to_string(history_[i].player) + ":" + + CardString(history_[i].action) + ","; + } + } + return p + cur_hand + public_info; } -std::vector GWhistFState::LegalActions() const{ - std::vector actions; - if (IsTerminal()) return {}; - if (IsChanceNode()) { - actions.reserve(popcnt_u64(deck_)); - uint64_t copy_deck = deck_; - while (copy_deck != 0) { - actions.push_back(tzcnt_u64(copy_deck)); - copy_deck = blsr_u64(copy_deck); - } +std::vector GWhistFState::LegalActions() const { + std::vector actions; + if (IsTerminal()) return {}; + if (IsChanceNode()) { + actions.reserve(popcnt_u64(deck_)); + uint64_t copy_deck = deck_; + while (copy_deck != 0) { + actions.push_back(tzcnt_u64(copy_deck)); + copy_deck = blsr_u64(copy_deck); + } + } else { + // lead// + actions.reserve(kNumRanks); + if (history_.back().player == kChancePlayerId) { + uint64_t copy_hand = hands_[player_]; + while (copy_hand != 0) { + actions.push_back(tzcnt_u64(copy_hand)); + copy_hand = blsr_u64(copy_hand); + } } - else { - //lead// - actions.reserve(kNumRanks); - if (history_.back().player == kChancePlayerId) { - uint64_t copy_hand = hands_[player_]; - while (copy_hand != 0) { - actions.push_back(tzcnt_u64(copy_hand)); - copy_hand = blsr_u64(copy_hand); - } - } - //follow// - else { - uint64_t copy_hand = hands_[player_] & kSuitMasks[CardSuit(history_.back().action)]; - if (copy_hand == 0) { - copy_hand = hands_[player_]; - } - while (copy_hand != 0) { - actions.push_back(tzcnt_u64(copy_hand)); - copy_hand = blsr_u64(copy_hand); - } - } - } - return actions; + // follow// + else { + uint64_t copy_hand = + hands_[player_] & kSuitMasks[CardSuit(history_.back().action)]; + if (copy_hand == 0) { + copy_hand = hands_[player_]; + } + while (copy_hand != 0) { + actions.push_back(tzcnt_u64(copy_hand)); + copy_hand = blsr_u64(copy_hand); + } + } + } + return actions; } void GWhistFState::DoApplyAction(Action move) { - //initial deal// - int player_start = player_; - if (move_number_ < (kNumSuits * kNumRanks) / 2) { - hands_[move_number_ % 2] = (hands_[move_number_ % 2] |((uint64_t)1 << move)); + // initial deal// + int player_start = player_; + if (move_number_ < (kNumSuits * kNumRanks) / 2) { + hands_[move_number_ % 2] = + (hands_[move_number_ % 2] | ((uint64_t)1 << move)); + deck_ = (deck_ ^ ((uint64_t)1 << move)); + } else if (move_number_ == (kNumSuits * kNumRanks / 2)) { + trump_ = CardSuit(move); + deck_ = (deck_ ^ ((uint64_t)1 << move)); + player_ = 0; + } + // cardplay// + else if (move_number_ > (kNumSuits * kNumRanks) / 2) { + int move_index = (move_number_ - ((kNumSuits * kNumRanks) / 2)) % 4; + switch (move_index) { + bool lead_win; + int winner; + int loser; + case 0: + // revealing face up card// deck_ = (deck_ ^ ((uint64_t)1 << move)); - } - else if (move_number_ == (kNumSuits * kNumRanks / 2)) { - trump_ = CardSuit(move); + lead_win = Trick(history_[move_number_ - 3].action, + history_[move_number_ - 2].action); + winner = + ((lead_win) ^ (history_[move_number_ - 3].player == 0)) ? 1 : 0; + player_ = winner; + break; + case 1: + // establishing lead// + discard_ = (discard_ | ((uint64_t)1 << move)); + hands_[player_] = (hands_[player_] ^ ((uint64_t)1 << move)); + (player_ == 0) ? player_ = 1 : player_ = 0; + break; + case 2: + // following and awarding face up// + discard_ = (discard_ | ((uint64_t)1 << move)); + hands_[player_] = (hands_[player_] ^ ((uint64_t)1 << move)); + lead_win = Trick(history_[move_number_ - 1].action, move); + winner = + ((lead_win) ^ (history_[move_number_ - 1].player == 0)) ? 1 : 0; + hands_[winner] = (hands_[winner] | + ((uint64_t)1 << history_[move_number_ - 2].action)); + player_ = kChancePlayerId; + break; + case 3: + // awarding face down// deck_ = (deck_ ^ ((uint64_t)1 << move)); - player_ = 0; - } - //cardplay// - else if (move_number_ > (kNumSuits * kNumRanks) / 2) { - int move_index = (move_number_ - ((kNumSuits * kNumRanks) / 2)) % 4; - switch (move_index) { - bool lead_win; - int winner; - int loser; - case 0: - //revealing face up card// - deck_ = (deck_ ^ ((uint64_t)1 << move)); - lead_win = Trick(history_[move_number_ - 3].action, history_[move_number_ - 2].action); - winner = ((lead_win) ^ (history_[move_number_ - 3].player == 0)) ? 1 : 0; - player_ = winner; - break; - case 1: - //establishing lead// - discard_ = (discard_|((uint64_t)1<>& bin_coeffs); -vectorNa LoadTTable(const std::string filename,int depth,std::vector>& bin_coeffs); +vectorNa InitialiseTTable(int size,const std::vector>& bin_coeffs); +vectorNa LoadTTable(const std::string filename,int depth,const std::vector>& bin_coeffs); class GWhistFGame : public Game { public: explicit GWhistFGame(const GameParameters& params);