go-ai/go_player/myPlayer.py

# -*- coding: utf-8 -*-
"""This is the file you have to modify for the tournament. Your default AI player must be called by this module, in the
myPlayer class.

Right now, this class contains the copy of the randomPlayer. But you have to change this!
"""

from sys import stderr
import time
import math
import Goban
from random import choice
from moveSearch import IDDFS, alphabeta
from playerInterface import *
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import Dataset
import json


def setup_device():
    # Allows to use the GPU if available
    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")

    return device


def goban2Go(board: Goban.Board):
    """
    Convert a goban board to a tensor for the model
    """
    goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
    black_plays = board.next_player() == Goban.Board._BLACK

    flat = board.get_board()
    for i in range(8):
        for j in range(8):
            if flat[i * 8 + j] == Goban.Board._BLACK:
                goBoard[0, i, j] = 1
            elif flat[i * 8 + j] == Goban.Board._WHITE:
                goBoard[1, i, j] = 1

    goBoard[2, :, :] = 1 if black_plays else 0

    return goBoard


class GoModel(nn.Module):
    def __init__(self):
        super(GoModel, self).__init__()

        self.net = torch.nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(16),
            torch.nn.ReLU(),
            nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(32),
            torch.nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.Dropout(0.4),
            torch.nn.ReLU(),
            nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
            nn.BatchNorm2d(128),
            torch.nn.ReLU(),
            nn.Flatten(),
            nn.Linear(128 * 8 * 8, 128),
            nn.BatchNorm1d(128),
            torch.nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )

    def forward(self, x):
        if self.training:
            return self.net(x)
        else:
            y = self.net(x)
            batch_size = x.size(0)

            x_rotated = torch.stack(
                [torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1
            )  # x_rotated: [batch_size, 4, 3, 8, 8]
            x_rotated = x_rotated.view(-1, 3, 8, 8)  # [batch_size*4, 3, 8, 8]

            with torch.no_grad():
                y_rotated = self.net(x_rotated)  # [batch_size*4, 1]

            # Reshape to get them by rotation
            y_rotated = y_rotated.view(batch_size, 4, -1)  # [batch_size, 4, 1]
            y_mean = y_rotated.mean(dim=1)  # [batch_size, 1]

            return y_mean


class myPlayer(PlayerInterface):
    """
    Example of a random player for the go. The only tricky part is to be able to handle
    the internal representation of moves given by legal_moves() and used by push() and
    to translate them to the GO-move strings "A1", ..., "J8", "PASS". Easy!
    """

    def __init__(self):
        self._board = Goban.Board()
        self._mycolor = None
        self.last_op_move = -2

        self.device = setup_device()
        self.model = GoModel().to(self.device)
        checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
        self.model.load_state_dict(checkpoint["model_state_dict"])

        self.maxtime = 1800
        self.time = 0

        # Load plays for the opening
        self.plays = []
        with open("plays-8x8.json") as f:
            plays = json.load(f)

        # Only keep the plays we want
        l = "W" if self._mycolor == Goban.Board._WHITE else "B"
        filtered = filter(lambda t: l in t["result"], plays)

        # We sort to take the most advantageous openings
        lp = l + "+"
        for el in filtered:
            el["result"] = float(el["result"].replace(lp, ""))
            self.plays.append(el)

        self.plays.sort(key=lambda t: t["result"])

    def getPlayerName(self):
        return "xXx_7h3_5cRuM_M45T3r_xXx"

    def nnheuristic(self, board: Goban.Board, color):
        if board.is_game_over():
            if board.winner() == board._EMPTY:
                return 0.5

            return math.inf if board.winner() == self._mycolor else -math.inf

        go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)

        self.model.eval()
        with torch.no_grad():
            prediction = self.model(go_board).item()

        if color == Goban.Board._BLACK:
            return prediction

        return 1 - prediction

    def getPlayerMove(self):
        st = time.time()
        if self._board.is_game_over():
            print("Referee told me to play but the game is over!")
            return "PASS"

        # Take more time in endgame
        if self._board._nbBLACK + self._board._nbWHITE < 10:
            duration = 5

        elif self._board._nbBLACK + self._board._nbWHITE < 40:
            duration = 20

        else:
            duration = 30

        # Play quickly if lack of time
        duration = min(duration, (self.maxtime - self.time) / 10)

        move = -1
        b, w = self._board.compute_score()

        # If passing wins the game, pass
        if (
            self.last_op_move == -1
            and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0
        ):
            move = -1

        # Play greedily opening moves early in the game
        elif self._board._nbBLACK + self._board._nbWHITE < 10:
            turn = self._board._nbBLACK + self._board._nbWHITE
            for play in self.plays:
                if (
                    len(play["moves"]) > turn
                    and Goban.Board.name_to_flat(play["moves"][turn])
                    in self._board.legal_moves()
                ):
                    move = Goban.Board.name_to_flat(play["moves"][turn])

        # Use iddfs alphabeta
        else:
            move = IDDFS(
                self._board,
                self.nnheuristic,
                self._mycolor,
                duration=duration,
                max_depth=64,
            )

        nd = time.time()
        self.time += nd - st

        print(move, (nd - st), file=stderr)

        self._board.push(move)
        return Goban.Board.flat_to_name(move)

    def playOpponentMove(self, move):
        print("Opponent played ", move)  # New here
        self._board.push(Goban.Board.name_to_flat(move))
        self.last_op_move = Goban.Board.name_to_flat(move)

    def newGame(self, color):
        self._board = Goban.Board()
        self._mycolor = color
        self.last_op_move = -2
        self.time = 0

    def endGame(self, winner):
        if self._mycolor == winner:
            print("I won!!!")
        else:
            print("I lost :(!!")