diff --git a/go_player/localGame.py b/go_player/localGame.py index 7b5990d..1669c29 100644 --- a/go_player/localGame.py +++ b/go_player/localGame.py @@ -2,7 +2,7 @@ import Goban import myPlayer -import randomPlayer +import gnugoPlayer import time from io import StringIO import sys @@ -14,7 +14,7 @@ player1 = myPlayer.myPlayer() player1.newGame(Goban.Board._BLACK) players.append(player1) -player2 = randomPlayer.myPlayer() +player2 = gnugoPlayer.myPlayer() player2.newGame(Goban.Board._WHITE) players.append(player2) diff --git a/go_player/moveSearch.py b/go_player/moveSearch.py index c009618..5c7b4aa 100644 --- a/go_player/moveSearch.py +++ b/go_player/moveSearch.py @@ -19,7 +19,7 @@ def _alphabeta( depth: int = 3, ) -> tuple[float, Any]: - wantMax = board.next_player != color + wantMax = (board.next_player != color) if depth == 0 or board.is_game_over(): return heuristic(board, color), move @@ -95,7 +95,7 @@ def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42): depth = 1 move = -1 - while time.time() - st < duration and depth < maxdepth: + while time.time() - st < duration and depth <= maxdepth: print("depth:", depth, time.time() - st, file=stderr) move = _alphabeta( board, heuristic, color, move=-1, alpha=-10, beta=10, depth=depth diff --git a/go_player/myPlayer.py b/go_player/myPlayer.py index 7f5a40b..64ccf5d 100644 --- a/go_player/myPlayer.py +++ b/go_player/myPlayer.py @@ -5,11 +5,155 @@ myPlayer class. Right now, this class contains the copy of the randomPlayer. But you have to change this! """ +from sys import stderr import time import Goban from random import choice from moveSearch import IDDFS, alphabeta from playerInterface import * +import torch +import torch.nn as nn +import torch.nn.functional as F +import numpy as np +from torch.utils.data import Dataset + +def setup_device(): + torch.set_float32_matmul_precision("medium") + # Allows to use the GPU if available + if torch.cuda.is_available(): + device = torch.device("cuda") + torch.backends.cuda.matmul.allow_tf32 = True + elif torch.backends.mps.is_available(): + device = torch.device("mps") + else: + device = torch.device("cpu") + + return device + + +def goban2Go(board: Goban.Board): + goBoard = torch.zeros((3, 8, 8), dtype=torch.float32) + black_plays = (board.next_player() == Goban.Board._BLACK) + + flat = board.get_board() + for i in range(8): + for j in range(8): + if flat[i * 8 + j] == Goban.Board._BLACK: + goBoard[0, i, j] = 1 + elif flat[i * 8 + j] == Goban.Board._WHITE: + goBoard[1, i, j] = 1 + + goBoard[2,:,:] = 1 if black_plays else 0 + + return goBoard + + +class GoModel(nn.Module): + def __init__(self): + super(GoModel, self).__init__() + + self.net = torch.nn.Sequential( + nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(16), + torch.nn.ReLU(), + + nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(32), + torch.nn.ReLU(), + + nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(64), + nn.Dropout(0.4), + torch.nn.ReLU(), + + nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(128), + torch.nn.ReLU(), + + nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(128), + torch.nn.ReLU(), + + nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), + nn.BatchNorm2d(128), + torch.nn.ReLU(), + + nn.Flatten(), + + nn.Linear(128 * 8 * 8, 128), + nn.BatchNorm1d(128), + torch.nn.ReLU(), + + nn.Dropout(0.4), + nn.Linear(128, 1), + nn.Sigmoid() + ) + + def forward(self, x): + if self.training: + return self.net(x) + else: + y = self.net(x) + batch_size = x.size(0) + + x_rotated = torch.stack([torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1) # x_rotated: [batch_size, 4, 3, 8, 8] + x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8] + + with torch.no_grad(): + y_rotated = self.net(x_rotated) # [batch_size*4, 1] + + # Reshape to get them by rotation + y_rotated = y_rotated.view(batch_size, 4, -1) # [batch_size, 4, 1] + y_mean = y_rotated.mean(dim=1) # [batch_size, 1] + + return y_mean + + +class GoDataset(Dataset): + def __init__(self, data, device, test=False): + def label(d, j): + if j == 0: + return d["black_wins"] / d["rollouts"] + else: + return 1 - label(d, 0) + + def board(d, j, k): + if j == 0: + out = stones_to_board(d["black_stones"], d["white_stones"], d["depth"] % 2 == 0) + else: + out = stones_to_board(d["white_stones"], d["black_stones"], d["depth"] % 2 == 1) + + if k == 0: + return out + else: + return out.flipud() + + if test: + dims = [1, 2] + self.boards = torch.from_numpy(np.array([ + board(d, 0, 0) for d in data + ])).float().to(device) + self.labels = torch.from_numpy(np.array( + [label(d, 0) for d in data], + )).float().to(device) + else: + dims = [1, 2] + self.boards = torch.from_numpy(np.array([ + torch.rot90(board(d, j, k), i, dims) + for d in data + for k in range(2) + for i in range(4) + for j in range(2) + ])).float().to(device) + self.labels = torch.from_numpy(np.array( + [label(d, j) for d in data for _ in range(4) for _k in range(2) for j in range(2)], + )).float().to(device) + + def __len__(self): + return len(self.boards) + + def __getitem__(self, i): + return self.boards[i], self.labels[i] class myPlayer(PlayerInterface): @@ -24,6 +168,14 @@ class myPlayer(PlayerInterface): self._mycolor = None self.moveCount = 0 + self.device = setup_device() + print(self.device) + + self.model = GoModel().to(self.device) + + checkpoint = torch.load("scrum.pt", weights_only=True) + self.model.load_state_dict(checkpoint["model_state_dict"]) + def getPlayerName(self): return "xXx_7h3_5cRuM_M45T3r_xXx" @@ -35,23 +187,30 @@ class myPlayer(PlayerInterface): score[0] - score[1] if color == Goban.Board._BLACK else score[1] - score[0] ) + def nnheuristic(self, board, color): + go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device) + + self.model.eval() + with torch.no_grad(): + prediction = self.model(go_board) + + return prediction + def getPlayerMove(self): if self._board.is_game_over(): print("Referee told me to play but the game is over!") return "PASS" - if self.moveCount < 10: + if self.moveCount < 40: max_depth = 1 - elif self.moveCount < 20: - max_depth = 2 - elif self.moveCount < 40: - max_depth = 3 else: - max_depth = 24 + max_depth = 5 + # move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1) move = IDDFS( - self._board, self.simple_heuristic, self._mycolor, duration=1., maxdepth=max_depth - ) # IDDFS(self._board, self.simple_heuristic, self._mycolor, 1.) + self._board, self.nnheuristic, self._mycolor, duration=1., maxdepth=max_depth + ) + self._board.push(move) # New here: allows to consider internal representations of moves diff --git a/go_player/requirements.txt b/go_player/requirements.txt index 24ce15a..3b7480f 100644 --- a/go_player/requirements.txt +++ b/go_player/requirements.txt @@ -1 +1,2 @@ numpy +torch diff --git a/go_player/scrum.pt b/go_player/scrum.pt new file mode 100644 index 0000000..68b939c Binary files /dev/null and b/go_player/scrum.pt differ