diff --git a/go_player/moveSearch.py b/go_player/moveSearch.py index d35bc5e..b5dcbdc 100644 --- a/go_player/moveSearch.py +++ b/go_player/moveSearch.py @@ -4,6 +4,7 @@ import math from typing import Any, Callable import Goban + def _alphabeta( board: Goban.Board, heuristic: Callable[[Goban.Board, Any], float], @@ -11,12 +12,12 @@ def _alphabeta( alpha=-math.inf, beta=math.inf, depth: int = 3, - shouldStop=lambda: False + shouldStop=lambda: False, ) -> tuple[float, Any]: if board.is_game_over() or depth == 0: return heuristic(board, color), None - wantMax = (board.next_player() == color) + wantMax = board.next_player() == color best_move = None if wantMax: @@ -33,7 +34,7 @@ def _alphabeta( alpha=alpha, beta=beta, depth=depth - 1, - shouldStop=shouldStop + shouldStop=shouldStop, )[0] board.pop() @@ -59,7 +60,7 @@ def _alphabeta( alpha=alpha, beta=beta, depth=depth - 1, - shouldStop=shouldStop + shouldStop=shouldStop, )[0] board.pop() @@ -73,6 +74,7 @@ def _alphabeta( return acc, best_move + def alphabeta( board: Goban.Board, heuristic: Callable[[Goban.Board, Any], float], @@ -88,7 +90,7 @@ def IDDFS( heuristic: Callable[[Goban.Board, Any], float], color, max_depth: int = 10, - duration: float = 5.0 # Duration in seconds + duration: float = 5.0, # Duration in seconds ): best_move = None start_time = time.time() @@ -97,11 +99,7 @@ def IDDFS( for depth in range(1, max_depth + 1): value, move = _alphabeta( - board, - heuristic=heuristic, - color=color, - depth=depth, - shouldStop=shouldStop + board, heuristic=heuristic, color=color, depth=depth, shouldStop=shouldStop ) if shouldStop(): diff --git a/go_player/myPlayer.py b/go_player/myPlayer.py index 5368da2..8dadb54 100644 --- a/go_player/myPlayer.py +++ b/go_player/myPlayer.py @@ -19,6 +19,7 @@ import numpy as np from torch.utils.data import Dataset import json + def setup_device(): # Allows to use the GPU if available if torch.cuda.is_available(): @@ -36,7 +37,7 @@ def goban2Go(board: Goban.Board): Convert a goban board to a tensor for the model """ goBoard = torch.zeros((3, 8, 8), dtype=torch.float32) - black_plays = (board.next_player() == Goban.Board._BLACK) + black_plays = board.next_player() == Goban.Board._BLACK flat = board.get_board() for i in range(8): @@ -46,7 +47,7 @@ def goban2Go(board: Goban.Board): elif flat[i * 8 + j] == Goban.Board._WHITE: goBoard[1, i, j] = 1 - goBoard[2,:,:] = 1 if black_plays else 0 + goBoard[2, :, :] = 1 if black_plays else 0 return goBoard @@ -56,50 +57,44 @@ class GoModel(nn.Module): super(GoModel, self).__init__() self.net = torch.nn.Sequential( - nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False), - nn.BatchNorm2d(16), - torch.nn.ReLU(), - - nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False), - nn.BatchNorm2d(32), - torch.nn.ReLU(), - - nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False), - nn.BatchNorm2d(64), - nn.Dropout(0.4), - torch.nn.ReLU(), - - nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False), - nn.BatchNorm2d(128), - torch.nn.ReLU(), - - nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), - nn.BatchNorm2d(128), - torch.nn.ReLU(), - - nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), - nn.BatchNorm2d(128), - torch.nn.ReLU(), - - nn.Flatten(), - - nn.Linear(128 * 8 * 8, 128), - nn.BatchNorm1d(128), - torch.nn.ReLU(), - - nn.Dropout(0.4), - nn.Linear(128, 1), - nn.Sigmoid() + nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(16), + torch.nn.ReLU(), + nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(32), + torch.nn.ReLU(), + nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(64), + nn.Dropout(0.4), + torch.nn.ReLU(), + nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(128), + torch.nn.ReLU(), + nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(128), + torch.nn.ReLU(), + nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(128), + torch.nn.ReLU(), + nn.Flatten(), + nn.Linear(128 * 8 * 8, 128), + nn.BatchNorm1d(128), + torch.nn.ReLU(), + nn.Dropout(0.4), + nn.Linear(128, 1), + nn.Sigmoid(), ) def forward(self, x): - if self.training: + if self.training: return self.net(x) else: y = self.net(x) batch_size = x.size(0) - x_rotated = torch.stack([torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1) # x_rotated: [batch_size, 4, 3, 8, 8] + x_rotated = torch.stack( + [torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1 + ) # x_rotated: [batch_size, 4, 3, 8, 8] x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8] with torch.no_grad(): @@ -107,7 +102,7 @@ class GoModel(nn.Module): # Reshape to get them by rotation y_rotated = y_rotated.view(batch_size, 4, -1) # [batch_size, 4, 1] - y_mean = y_rotated.mean(dim=1) # [batch_size, 1] + y_mean = y_rotated.mean(dim=1) # [batch_size, 1] return y_mean @@ -193,24 +188,35 @@ class myPlayer(PlayerInterface): b, w = self._board.compute_score() # If passing wins the game, pass - if self.last_op_move == -1 and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0: + if ( + self.last_op_move == -1 + and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0 + ): move = -1 # Play greedily opening moves early in the game elif self._board._nbBLACK + self._board._nbWHITE < 10: turn = self._board._nbBLACK + self._board._nbWHITE for play in self.plays: - if len(play["moves"]) > turn and Goban.Board.name_to_flat(play["moves"][turn]) in self._board.legal_moves(): + if ( + len(play["moves"]) > turn + and Goban.Board.name_to_flat(play["moves"][turn]) + in self._board.legal_moves() + ): move = Goban.Board.name_to_flat(play["moves"][turn]) # Use iddfs alphabeta else: move = IDDFS( - self._board, self.nnheuristic, self._mycolor, duration=duration, max_depth=64 + self._board, + self.nnheuristic, + self._mycolor, + duration=duration, + max_depth=64, ) nd = time.time() - self.time += (nd - st) + self.time += nd - st print(move, (nd - st), file=stderr)