From db6878ec4067af13fa0382a4352782c345469383 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Rapin?= Date: Mon, 30 Dec 2019 13:36:04 +0100 Subject: [PATCH] Clean up some code in games and powersystem (#420) --- nevergrad/functions/games/game.py | 265 +++++++++++------------ nevergrad/functions/games/test_game.py | 13 +- nevergrad/functions/powersystems/core.py | 102 +++++---- 3 files changed, 196 insertions(+), 184 deletions(-) diff --git a/nevergrad/functions/games/game.py b/nevergrad/functions/games/game.py index c4307e76e..0c0a290b4 100644 --- a/nevergrad/functions/games/game.py +++ b/nevergrad/functions/games/game.py @@ -6,9 +6,9 @@ # Discussions with Tristan Cazenave, Bruno Bouzy, have been helpful. # Dagstuhl's 2019 seminar on games has been helpful (seminar 19511). +import functools import numpy as np from nevergrad import instrumentation as inst -from nevergrad.instrumentation.multivariables import Instrumentation class _Game: @@ -18,11 +18,11 @@ def __init__(self): self.history2 = [] self.batawaf = False self.converter = { - "flip": lambda p1, p2: self.flip_play_game(p1, p2), - "batawaf": lambda p1, p2: self.war_play_game(p1, p2, batawaf=True), - "war": lambda p1, p2: self.war_play_game(p1, p2), - "guesswho": lambda p1, p2: self.guesswho_play_game(p1, p2), - "bigguesswho": lambda p1, p2: self.guesswho_play_game(p1, p2, init=96), + "flip": self.flip_play_game, + "batawaf": functools.partial(self.war_play_game, batawaf=True), + "war": self.war_play_game, + "guesswho": self.guesswho_play_game, + "bigguesswho": functools.partial(self.guesswho_play_game, init=96), } def get_list_of_games(self): @@ -31,11 +31,10 @@ def get_list_of_games(self): # If both policies are None, then we return the length of a policy (which is a list of float). # Otherwise we return 1 if policy1 wins, 2 if policy2 wins, and 0 in case of draw. def play_game(self, game, policy1=None, policy2=None): - # pylint: disable=too-many-return-statements self.history1 = [] self.history2 = [] if game not in self.converter.keys(): - raise NotImplementedError(game) + raise NotImplementedError(f"{game} is not implemented, choose among: {list(self.converter.keys())}") return self.converter[game](policy1, policy2) def guesswho_play_noturn(self, decks, policy): @@ -54,7 +53,7 @@ def guesswho_play_noturn(self, decks, policy): + policy[2] * late * difference / (1 + decks[0]) + policy[3] * late * difference / (1 + decks[1]) ) - except: + except Exception: # pylint: disable=broad-except return baseline def guesswho_play(self, policy, decks, turn): @@ -170,11 +169,11 @@ def flip_play_game_nosym(self, policy1, policy2): del cards1[0] del cards2[0] something_moves = True -# print "==========" -# print visible1 + [nan] + [len(visible1) + len(cards1)] -# print stack -# print visible2 + [nan] + [len(visible2) + len(cards2)] -# print "==========" + # print "==========" + # print visible1 + [nan] + [len(visible1) + len(cards1)] + # print stack + # print visible2 + [nan] + [len(visible2) + len(cards2)] + # print "==========" return 1 if len(visible1) < len(visible2) else 2 if len(visible2) < len(visible1) else 0 def flip_value(self, visible1, visible2, l1, l2, stack, policy1): @@ -195,116 +194,117 @@ def flip_value(self, visible1, visible2, l1, l2, stack, policy1): value += policy1[i * 57 + j] * state[i] * state[j] return value - def phantomgo_choose(self, policy, history): - if policy is not None and history < len(policy): - state = np.random.RandomState(hash(policy[history])) - result = state.randint(board.NN) - return result - else: - result = np.random.randint(board.NN) - return result - - def phantomgo_play_game(self, policy1, policy2, size=7): - if policy1 is None and policy2 is None: - return 20000 - if np.random.uniform(0., 1.) < .5: - result = self.internal_phantomgo_play_game(policy2, policy1, size) - return 1 if result == 2 else 2 if result == 1 else 0 - - return self.internal_phantomgo_play_game(policy1, policy2, size) - - def internal_phantomgo_play_game(self, policy1, policy2, size): - # pylint: disable=too-many-locals - # Empty board. - if size == 7: - EMPTY_BOARD = EMPTY_BOARD7 - Position = Position7 - if size == 9: - EMPTY_BOARD = EMPTY_BOARD9 - Position = Position9 - if size == 19: - EMPTY_BOARD = EMPTY_BOARD19 - Position = Position19 - - p = Position(board=EMPTY_BOARD, ko=None) - - mixing = 3 # We mix 3 policies. Please note that we also apply a 8-fold random rotation/symmetry. - if policy1 is not None: - init = np.random.randint(mixing) - policy1 = [policy1[i] for i in range(init, len(policy1), mixing)] - if policy2 is not None: - init = np.random.randint(mixing) - policy2 = [policy2[i] for i in range(init, len(policy2), mixing)] - - history1 = 1 # Player 1 starts. - history2 = 2 - - random_rot = np.random.randint(8) - - def transformation(x): - a = x // size - b = x % size - if random_rot == 0: - a = size - 1 - a - b = size - 1 - b - elif random_rot == 1: - b = size - 1 - b - elif random_rot == 2: - a = size - 1 - a - b = size - 1 - b - elif random_rot == 3: - b = size - 1 - b - elif random_rot == 4: - c = b - b = a - a = c - a = size - 1 - a - elif random_rot == 5: - c = b - b = a - a = c - elif random_rot == 6: - c = b - b = a - a = c - a = size - 1 - a - elif random_rot == 7: - c = b - b = a - a = c - return a * size + b - - # pylint: disable=broad-except - for _ in range(2 * size * size - size - 1): - # print("move " + str(idx)) - # print("=================") - # print(str(p)) - # print("=================") - for _ in range((size * size - 9) // 2): - try: - move1 = transformation(self.phantomgo_choose(policy1, history1)) - # print("player1 trying ", move1) - p = p.play_move(move1, board.BLACK) - history1 = 2 * history1 + 1 # legal move. - break - except Exception: - # print("failed!" + str(e)) - history1 = 2 * history1 + 2 # illegal move. - # print("=================") - # print(p) - # print("=================") - for _ in range((size * size - 9) // 2): - try: - move2 = self.phantomgo_choose(policy2, history2) - # print("player 2 trying ", move2) - p = p.play_move(move2, board.WHITE) - history2 = 2 * history2 + 1 # legal move. - break - except Exception: - # print("failed!" + str(e)) - history2 = 2 * history2 + 2 # illegal move. - - return 1 if p.score() > 0 else 2 + # TODO remove if not planned to be used + # def phantomgo_choose(self, policy, history): + # if policy is not None and history < len(policy): + # state = np.random.RandomState(hash(policy[history])) + # result = state.randint(board.NN) + # return result + # else: + # result = np.random.randint(board.NN) + # return result + + # def phantomgo_play_game(self, policy1, policy2, size=7): + # if policy1 is None and policy2 is None: + # return 20000 + # if np.random.uniform(0., 1.) < .5: + # result = self.internal_phantomgo_play_game(policy2, policy1, size) + # return 1 if result == 2 else 2 if result == 1 else 0 + + # return self.internal_phantomgo_play_game(policy1, policy2, size) + + # def internal_phantomgo_play_game(self, policy1, policy2, size): + # # pylint: disable=too-many-locals + # # Empty board. + # if size == 7: + # EMPTY_BOARD = EMPTY_BOARD7 + # Position = Position7 + # if size == 9: + # EMPTY_BOARD = EMPTY_BOARD9 + # Position = Position9 + # if size == 19: + # EMPTY_BOARD = EMPTY_BOARD19 + # Position = Position19 + + # p = Position(board=EMPTY_BOARD, ko=None) + + # mixing = 3 # We mix 3 policies. Please note that we also apply a 8-fold random rotation/symmetry. + # if policy1 is not None: + # init = np.random.randint(mixing) + # policy1 = [policy1[i] for i in range(init, len(policy1), mixing)] + # if policy2 is not None: + # init = np.random.randint(mixing) + # policy2 = [policy2[i] for i in range(init, len(policy2), mixing)] + + # history1 = 1 # Player 1 starts. + # history2 = 2 + + # random_rot = np.random.randint(8) + + # def transformation(x): + # a = x // size + # b = x % size + # if random_rot == 0: + # a = size - 1 - a + # b = size - 1 - b + # elif random_rot == 1: + # b = size - 1 - b + # elif random_rot == 2: + # a = size - 1 - a + # b = size - 1 - b + # elif random_rot == 3: + # b = size - 1 - b + # elif random_rot == 4: + # c = b + # b = a + # a = c + # a = size - 1 - a + # elif random_rot == 5: + # c = b + # b = a + # a = c + # elif random_rot == 6: + # c = b + # b = a + # a = c + # a = size - 1 - a + # elif random_rot == 7: + # c = b + # b = a + # a = c + # return a * size + b + + # # pylint: disable=broad-except + # for _ in range(2 * size * size - size - 1): + # # print("move " + str(idx)) + # # print("=================") + # # print(str(p)) + # # print("=================") + # for _ in range((size * size - 9) // 2): + # try: + # move1 = transformation(self.phantomgo_choose(policy1, history1)) + # # print("player1 trying ", move1) + # p = p.play_move(move1, board.BLACK) + # history1 = 2 * history1 + 1 # legal move. + # break + # except Exception: + # # print("failed!" + str(e)) + # history1 = 2 * history1 + 2 # illegal move. + # # print("=================") + # # print(p) + # # print("=================") + # for _ in range((size * size - 9) // 2): + # try: + # move2 = self.phantomgo_choose(policy2, history2) + # # print("player 2 trying ", move2) + # p = p.play_move(move2, board.WHITE) + # history2 = 2 * history2 + 1 # legal move. + # break + # except Exception: + # # print("failed!" + str(e)) + # history2 = 2 * history2 + 2 # illegal move. + + # return 1 if p.score() > 0 else 2 def war_play_game(self, policy1, policy2, batawaf=False): # pylint: disable=too-many-return-statements @@ -380,8 +380,7 @@ def war_decide(self, policy, num_cards, list_of_cards): # Real life is more complicated! This is a very simple model. -# pylint: disable=too-many-instance-attributes,too-many-arguments,too-many-statements,too-many-locals -class Game(inst.InstrumentedFunction): +class Game(inst.InstrumentedFunction): # TODO: Improve seeding support (with ParametrizedFunction) """ Parameters ---------- @@ -393,9 +392,8 @@ class Game(inst.InstrumentedFunction): def __init__(self, game: str = "war") -> None: self.game = game self.game_object = _Game() - the_dimension = self.game_object.play_game(self.game) * 2 # times 2 because we consider both players separately. - instrumentation = Instrumentation(inst.var.Array(the_dimension)) - super().__init__(self._simulate_game, instrumentation) + dimension = self.game_object.play_game(self.game) * 2 # times 2 because we consider both players separately. + super().__init__(self._simulate_game, inst.var.Array(dimension)) self.instrumentation.probably_noisy = True self.instrumentation.is_nonmetrizable = game in ["war", "batawaf"] self._descriptors.update(game=game) @@ -403,15 +401,14 @@ def __init__(self, game: str = "war") -> None: def _simulate_game(self, x: np.ndarray) -> float: # FIXME: an adaptive opponent, e.g. bandit, would be better. # We play a game as player 1. - np_state = np.random.get_state() - np.random.seed(self.instrumentation.random_state.randint(12560, dtype=np.uint32)) + # np_state = np.random.get_state() # TODO TOO DANGEREOUS! this can make the game play the same move all over again p1 = x[:(self.dimension // 2)] - p2 = self.instrumentation.random_state.normal(size=self.dimension // 2) + p2 = np.random.normal(size=self.dimension // 2) r = self.game_object.play_game(self.game, p1, p2) result = 0. if r == 1 else 0.5 if r == 0 else 1. # We play a game as player 2. - p1 = self.instrumentation.random_state.normal(size=self.dimension // 2) + p1 = np.random.normal(size=self.dimension // 2) p2 = x[(self.dimension // 2):] r = self.game_object.play_game(self.game, p1, p2) - np.random.set_state(np_state) + # np.random.set_state(np_state) return (result + (0. if r == 2 else 0.5 if r == 0 else 1.)) / 2 diff --git a/nevergrad/functions/games/test_game.py b/nevergrad/functions/games/test_game.py index 038873a58..3d981f51e 100644 --- a/nevergrad/functions/games/test_game.py +++ b/nevergrad/functions/games/test_game.py @@ -1,13 +1,13 @@ -from ...common import testing -from typing import Any -from typing import List +import typing as tp +from nevergrad.common import testing import numpy as np from . import game -@testing.parametrized(**{name: (name,) for name in ["war", "flip", "batawaf", "guesswho", "bigguesswho"]}) + +@testing.parametrized(**{name: (name,) for name in game._Game().get_list_of_games()}) def test_games(name: str) -> None: dimension = game._Game().play_game(name) - res: List[Any] = [] + res: tp.List[tp.Any] = [] for _ in range(200): res += [game._Game().play_game(name, np.random.uniform(0, 1, dimension), None)] score = (float(sum(1 if r == 2 else 0 if r == 1 else 0.5 for r in res)) / len(res)) @@ -15,6 +15,3 @@ def test_games(name: str) -> None: assert score <= 0.9 function = game.Game(name) function(function.instrumentation.random_state.normal(size=function.dimension)) - - - diff --git a/nevergrad/functions/powersystems/core.py b/nevergrad/functions/powersystems/core.py index c2894d96c..8e7b2a2eb 100644 --- a/nevergrad/functions/powersystems/core.py +++ b/nevergrad/functions/powersystems/core.py @@ -6,13 +6,11 @@ # This code is based on a code and ideas by Emmanuel Centeno and Antoine Moreau, # University Clermont Auvergne, CNRS, SIGMA Clermont, Institut Pascal +import typing as tp from math import pi, cos, sin -from typing import Any -from typing import List import matplotlib.pyplot as plt import numpy as np from ... import instrumentation as inst -from ...instrumentation.multivariables import Instrumentation class Agent(): @@ -23,55 +21,75 @@ def __init__(self, input_size: int, output_size: int, layers: int = 3, layer_wid assert layers >= 2 self.input_size = input_size self.output_size = output_size - self.layers: List[Any] = [] + self.layers: tp.List[tp.Any] = [] self.layers += [np.zeros((input_size, layer_width))] for _ in range(layers - 2): self.layers += [np.zeros((layer_width, layer_width))] self.layers += [np.zeros((layer_width, output_size))] assert len(self.layers) == layers - def GetParamNumbers(self) -> int: + @property + def dimension(self) -> int: return sum([np.prod(l.shape) for l in self.layers]) - def SetParams(self, ww: Any) -> None: + def set_parameters(self, ww: tp.Any) -> None: w = [w for w in ww] - assert len(w) == self.GetParamNumbers() + assert len(w) == self.dimension for i in range(len(self.layers)): s = np.prod(self.layers[i].shape) self.layers[i] = np.reshape(np.array(w[:s]), self.layers[i].shape) # TODO @oteytaud new name? w = w[s:] - def GetOutput(self, inp: Any) -> np.ndarray: + def get_output(self, inp: tp.Any) -> np.ndarray: output = np.array(inp).reshape(1, len(inp)) for l in self.layers[:-1]: output = np.tanh(np.matmul(output, l)) return np.matmul(output, self.layers[-1]) # type: ignore -# Real life is more complicated! This is a very simple model. # pylint: disable=too-many-instance-attributes,too-many-arguments,too-many-statements,too-many-locals -class PowerSystem(inst.InstrumentedFunction): - """ +class PowerSystem(inst.InstrumentedFunction): # TODO revise seeding with ParametrizedFunction + """Very simple model of a power system. + Real life is more complicated! + Parameters ---------- - nint intaum_stocks: number of stocks to be managed - depth: number of layers in the neural networks - width: number of neurons per hidden layer + num_dams: int + number of dams to be managed + depth: int + number of layers in the neural networks + width: int + number of neurons per hidden layer + year_to_day_ratio: float = 2. + Ratio between std of consumption in the year and std of consumption in the day. + constant_to_year_ratio: float + Ratio between constant baseline consumption and std of consumption in the year. + back_to_normal: float + Part of the variability which is forgotten at each time step. + consumption_noise: float + Instantaneous variability. + num_thermal_plants: int + Number of thermal plants. + num_years: int + Number of years. + failure_cost: float + Cost of not satisfying the demand. Equivalent to an expensive infinite capacity thermal plant. """ - def __init__(self, num_dams: int = 13, depth: int = 3, width: int = 3, - year_to_day_ratio: float = 2., # Ratio between std of consumption in the year and std of consumption in the day. - constant_to_year_ratio: float = 1., # Ratio between constant baseline consumption and std of consumption in the year. - back_to_normal: float = 0.5, # Part of the variability which is forgotten at each time step. - consumption_noise: float = 0.1, # Instantaneous variability. - num_thermal_plants: int = 7, # Number of thermal plants. - num_years: int = 1, # Number of years. - # Cost of not satisfying the demand. Equivalent to an expensive infinite capacity thermal plant. + def __init__(self, num_dams: int = 13, + depth: int = 3, + width: int = 3, + year_to_day_ratio: float = 2., + constant_to_year_ratio: float = 1., + back_to_normal: float = 0.5, + consumption_noise: float = 0.1, + num_thermal_plants: int = 7, + num_years: int = 1, failure_cost: float = 500., ) -> None: self.num_dams = num_dams - self.losses: List[float] = [] - self.marginal_costs: List[float] = [] + self.losses: tp.List[float] = [] + self.marginal_costs: tp.List[float] = [] # Parameters describing the problem. self.year_to_day_ratio = year_to_day_ratio self.constant_to_year_ratio = constant_to_year_ratio @@ -80,30 +98,30 @@ def __init__(self, num_dams: int = 13, depth: int = 3, width: int = 3, self.num_thermal_plants = num_thermal_plants self.number_of_years = num_years self.failure_cost = failure_cost - self.hydro_prod_per_time_step: List[Any] = [] # TODO @oteytaud initial values? - self.consumption_per_time_step: List[Any] = [] + self.hydro_prod_per_time_step: tp.List[tp.Any] = [] # TODO @oteytaud initial values? + self.consumption_per_time_step: tp.List[tp.Any] = [] self.average_consumption = self.constant_to_year_ratio * self.year_to_day_ratio self.thermal_power_capacity = self.average_consumption * np.random.rand(self.num_thermal_plants) self.thermal_power_prices = np.random.rand(num_thermal_plants) - dam_agents: List[Any] = [] + dam_agents: tp.List[tp.Any] = [] for _ in range(num_dams): dam_agents += [Agent(10 + num_dams + 2 * self.num_thermal_plants, depth, width)] - the_dimension = sum([a.GetParamNumbers() for a in dam_agents]) + the_dimension = sum([a.dimension for a in dam_agents]) self.dam_agents = dam_agents - super().__init__(self._simulate_power_system, Instrumentation(inst.var.Array(the_dimension))) + super().__init__(self._simulate_power_system, inst.var.Array(the_dimension)) self._descriptors.update(num_dams=num_dams, depth=depth, width=width) - def get_num_vars(self) -> List[Any]: - return [m.GetParamNumbers() for m in self.dam_agents] + def get_num_vars(self) -> tp.List[tp.Any]: + return [m.dimension for m in self.dam_agents] def _simulate_power_system(self, x: np.ndarray) -> float: failure_cost = self.failure_cost # Cost of power demand which is not satisfied (equivalent to a expensive infinite thermal group). dam_agents = self.dam_agents for a in dam_agents: - assert len(x) >= a.GetParamNumbers() - a.SetParams(np.array(x[:a.GetParamNumbers()])) - x = x[a.GetParamNumbers():] + assert len(x) >= a.dimension + a.set_parameters(np.array(x[:a.dimension])) + x = x[a.dimension:] assert not x self.marginal_costs = [] @@ -116,8 +134,8 @@ def _simulate_power_system(self, x: np.ndarray) -> float: # Loop on time steps. num_time_steps = int(365 * 24 * self.number_of_years) consumption = 0. - hydro_prod_per_time_step: List[Any] = [] - consumption_per_time_step: List[float] = [] + hydro_prod_per_time_step: tp.List[tp.Any] = [] + consumption_per_time_step: tp.List[float] = [] for t in range(num_time_steps): # Rain @@ -144,7 +162,7 @@ def _simulate_power_system(self, x: np.ndarray) -> float: x = np.concatenate((base_x, self.thermal_power_capacity, self.thermal_power_prices, stocks)) # Prices as a decomposition tool! - price: np.ndarray = np.asarray([a.GetOutput(np.array(x))[0][0] for a in dam_agents]) + price: np.ndarray = np.asarray([a.get_output(np.array(x))[0][0] for a in dam_agents]) dam_index: np.ndarray = np.asarray(range(num_dams)) price = np.concatenate((price, self.thermal_power_prices)) capacity = np.concatenate((np.asarray(stocks), self.thermal_power_capacity)) @@ -202,22 +220,22 @@ def make_plots(self, filename: str = "ps.png") -> None: # num_time_steps = int(365 * 24 * self.number_of_years) # Utility function for plotting per year or per day. - def block(x: List[float]) -> List[float]: - result: List[float] = [] + def block(x: tp.List[float]) -> tp.List[float]: + result: tp.List[float] = [] step = int(np.sqrt(len(x))) for i in range(0, len(x), step): result += [sum(x[i:i + step]) / len(x[i:i + step])] return result - def block24(x: List[float]) -> List[float]: - result: List[float] = [] + def block24(x: tp.List[float]) -> tp.List[float]: + result: tp.List[float] = [] for i in range(0, len(x), 24): result += [sum(x[i:i + 24]) / len(x[i:i + 24])] if len(x) != len(result) * 24: print(len(x), len(result) * 24) return result - def deblock24(x: List[float]) -> List[float]: + def deblock24(x: tp.List[float]) -> tp.List[float]: result = [0.0] * 24 for i, _ in enumerate(x): result[i % 24] += x[i] / 24.