feat: use our NN heuristic

2025-05-17 19:39:08 +02:00 · 2025-05-17 19:39:08 +02:00 · 21392f229d
commit 21392f229d
parent 7b418ab5cb
5 changed files with 172 additions and 12 deletions
--- a/go_player/localGame.py
+++ b/go_player/localGame.py
@ -2,7 +2,7 @@
 import Goban
 import myPlayer
-import randomPlayer
+import gnugoPlayer
 import time
 from io import StringIO
 import sys
@ -14,7 +14,7 @@ player1 = myPlayer.myPlayer()
 player1.newGame(Goban.Board._BLACK)
 players.append(player1)
-player2 = randomPlayer.myPlayer()
+player2 = gnugoPlayer.myPlayer()
 player2.newGame(Goban.Board._WHITE)
 players.append(player2)
--- a/go_player/moveSearch.py
+++ b/go_player/moveSearch.py
@ -19,7 +19,7 @@ def _alphabeta(
    depth: int = 3,
 ) -> tuple[float, Any]:
-    wantMax = board.next_player != color
+    wantMax = (board.next_player != color)
    if depth == 0 or board.is_game_over():
        return heuristic(board, color), move
@ -95,7 +95,7 @@ def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42):
    depth = 1
    move = -1
-    while time.time() - st < duration and depth < maxdepth:
+    while time.time() - st < duration and depth <= maxdepth:
        print("depth:", depth, time.time() - st, file=stderr)
        move = _alphabeta(
            board, heuristic, color, move=-1, alpha=-10, beta=10, depth=depth
--- a/go_player/myPlayer.py
+++ b/go_player/myPlayer.py
@ -5,11 +5,155 @@ myPlayer class.
 Right now, this class contains the copy of the randomPlayer. But you have to change this!
 """
 from sys import stderr
 import time
 import Goban
 from random import choice
 from moveSearch import IDDFS, alphabeta
 from playerInterface import *
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 import numpy as np
 from torch.utils.data import Dataset
 def setup_device():
    torch.set_float32_matmul_precision("medium")
    # Allows to use the GPU if available
    if torch.cuda.is_available():
        device = torch.device("cuda")
        torch.backends.cuda.matmul.allow_tf32 = True
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    return device
 def goban2Go(board: Goban.Board):
    goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
    black_plays = (board.next_player() == Goban.Board._BLACK)
    flat = board.get_board()
    for i in range(8):
        for j in range(8):
            if flat[i * 8 + j] == Goban.Board._BLACK:
                goBoard[0, i, j] = 1
            elif flat[i * 8 + j] == Goban.Board._WHITE:
                goBoard[1, i, j] = 1
    goBoard[2,:,:] = 1 if black_plays else 0
    return goBoard
 class GoModel(nn.Module):
    def __init__(self):
        super(GoModel, self).__init__()
        self.net = torch.nn.Sequential(
             nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False),
             nn.BatchNorm2d(16),
             torch.nn.ReLU(),
             nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False),
             nn.BatchNorm2d(32),
             torch.nn.ReLU(),
             nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
             nn.BatchNorm2d(64),
             nn.Dropout(0.4),
             torch.nn.ReLU(),
             nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
             nn.BatchNorm2d(128),
             torch.nn.ReLU(),
             nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
             nn.BatchNorm2d(128),
             torch.nn.ReLU(),
             nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
             nn.BatchNorm2d(128),
             torch.nn.ReLU(),
             nn.Flatten(),
             nn.Linear(128 * 8 * 8, 128),
             nn.BatchNorm1d(128),
             torch.nn.ReLU(),
             nn.Dropout(0.4),
             nn.Linear(128, 1),
             nn.Sigmoid()
        )
    def forward(self, x):
        if self.training: 
            return self.net(x)
        else:
            y = self.net(x)
            batch_size = x.size(0)
            x_rotated = torch.stack([torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1) # x_rotated: [batch_size, 4, 3, 8, 8]
            x_rotated = x_rotated.view(-1, 3, 8, 8)  # [batch_size*4, 3, 8, 8]
            with torch.no_grad():
                y_rotated = self.net(x_rotated)  # [batch_size*4, 1]
            # Reshape to get them by rotation
            y_rotated = y_rotated.view(batch_size, 4, -1)  # [batch_size, 4, 1]
            y_mean = y_rotated.mean(dim=1)       # [batch_size, 1]
            return y_mean
 class GoDataset(Dataset):
    def __init__(self, data, device, test=False):
        def label(d, j):
            if j == 0:
                return d["black_wins"] / d["rollouts"]
            else:
                return 1 - label(d, 0)
        def board(d, j, k):
            if j == 0:
                out = stones_to_board(d["black_stones"], d["white_stones"], d["depth"] % 2 == 0)
            else:
                out = stones_to_board(d["white_stones"], d["black_stones"], d["depth"] % 2 == 1)
            if k == 0:
                return out
            else:
                return out.flipud()
        if test:
            dims = [1, 2]
            self.boards = torch.from_numpy(np.array([
                board(d, 0, 0) for d in data
            ])).float().to(device)
            self.labels = torch.from_numpy(np.array(
                [label(d, 0) for d in data],
            )).float().to(device)
        else:
            dims = [1, 2]
            self.boards = torch.from_numpy(np.array([
                torch.rot90(board(d, j, k), i, dims)
                for d in data
                for k in range(2)
                for i in range(4)
                for j in range(2)
            ])).float().to(device)
            self.labels = torch.from_numpy(np.array(
                [label(d, j) for d in data for _ in range(4) for _k in range(2) for j in range(2)],
            )).float().to(device)
    def __len__(self):
        return len(self.boards)
    def __getitem__(self, i):
        return self.boards[i], self.labels[i]
 class myPlayer(PlayerInterface):
@ -24,6 +168,14 @@ class myPlayer(PlayerInterface):
        self._mycolor = None
        self.moveCount = 0
        self.device = setup_device()
        print(self.device)
        self.model = GoModel().to(self.device)
        checkpoint = torch.load("scrum.pt", weights_only=True)
        self.model.load_state_dict(checkpoint["model_state_dict"])
    def getPlayerName(self):
        return "xXx_7h3_5cRuM_M45T3r_xXx"
@ -35,23 +187,30 @@ class myPlayer(PlayerInterface):
            score[0] - score[1] if color == Goban.Board._BLACK else score[1] - score[0]
        )
    def nnheuristic(self, board, color):
        go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)
        self.model.eval()
        with torch.no_grad():
            prediction = self.model(go_board)
        return prediction
    def getPlayerMove(self):
        if self._board.is_game_over():
            print("Referee told me to play but the game is over!")
            return "PASS"
-        if self.moveCount < 10:
+        if self.moveCount < 40:
            max_depth = 1
        elif self.moveCount < 20:
            max_depth = 2
        elif self.moveCount < 40:
            max_depth = 3
        else:
-            max_depth = 24
+            max_depth = 5
        # move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
        move = IDDFS(
-            self._board, self.simple_heuristic, self._mycolor, duration=1., maxdepth=max_depth
+            self._board, self.nnheuristic, self._mycolor, duration=1., maxdepth=max_depth
-        )  # IDDFS(self._board, self.simple_heuristic, self._mycolor, 1.)
+        )
        self._board.push(move)
        # New here: allows to consider internal representations of moves
--- a/go_player/requirements.txt
+++ b/go_player/requirements.txt
@ -1 +1,2 @@
 numpy
 torch
--- a/go_player/scrum.pt
+++ b/go_player/scrum.pt