go-ai/go_player/myPlayer.py
Nemo D'ACREMONT 91cf372a7e
formatted
2025-05-18 19:51:57 +02:00

242 lines
7.2 KiB
Python

# -*- coding: utf-8 -*-
"""This is the file you have to modify for the tournament. Your default AI player must be called by this module, in the
myPlayer class.
Right now, this class contains the copy of the randomPlayer. But you have to change this!
"""
from sys import stderr
import time
import math
import Goban
from random import choice
from moveSearch import IDDFS, alphabeta
from playerInterface import *
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import Dataset
import json
def setup_device():
# Allows to use the GPU if available
if torch.cuda.is_available():
device = torch.device("cuda")
elif torch.backends.mps.is_available():
device = torch.device("mps")
else:
device = torch.device("cpu")
return device
def goban2Go(board: Goban.Board):
"""
Convert a goban board to a tensor for the model
"""
goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
black_plays = board.next_player() == Goban.Board._BLACK
flat = board.get_board()
for i in range(8):
for j in range(8):
if flat[i * 8 + j] == Goban.Board._BLACK:
goBoard[0, i, j] = 1
elif flat[i * 8 + j] == Goban.Board._WHITE:
goBoard[1, i, j] = 1
goBoard[2, :, :] = 1 if black_plays else 0
return goBoard
class GoModel(nn.Module):
def __init__(self):
super(GoModel, self).__init__()
self.net = torch.nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(16),
torch.nn.ReLU(),
nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(32),
torch.nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.Dropout(0.4),
torch.nn.ReLU(),
nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128),
torch.nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128),
torch.nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128),
torch.nn.ReLU(),
nn.Flatten(),
nn.Linear(128 * 8 * 8, 128),
nn.BatchNorm1d(128),
torch.nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(128, 1),
nn.Sigmoid(),
)
def forward(self, x):
if self.training:
return self.net(x)
else:
y = self.net(x)
batch_size = x.size(0)
x_rotated = torch.stack(
[torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1
) # x_rotated: [batch_size, 4, 3, 8, 8]
x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8]
with torch.no_grad():
y_rotated = self.net(x_rotated) # [batch_size*4, 1]
# Reshape to get them by rotation
y_rotated = y_rotated.view(batch_size, 4, -1) # [batch_size, 4, 1]
y_mean = y_rotated.mean(dim=1) # [batch_size, 1]
return y_mean
class myPlayer(PlayerInterface):
"""
Example of a random player for the go. The only tricky part is to be able to handle
the internal representation of moves given by legal_moves() and used by push() and
to translate them to the GO-move strings "A1", ..., "J8", "PASS". Easy!
"""
def __init__(self):
self._board = Goban.Board()
self._mycolor = None
self.last_op_move = -2
self.device = setup_device()
self.model = GoModel().to(self.device)
checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
self.model.load_state_dict(checkpoint["model_state_dict"])
self.maxtime = 1800
self.time = 0
# Load plays for the opening
self.plays = []
with open("plays-8x8.json") as f:
plays = json.load(f)
# Only keep the plays we want
l = "W" if self._mycolor == Goban.Board._WHITE else "B"
filtered = filter(lambda t: l in t["result"], plays)
# We sort to take the most advantageous openings
lp = l + "+"
for el in filtered:
el["result"] = float(el["result"].replace(lp, ""))
self.plays.append(el)
self.plays.sort(key=lambda t: t["result"])
def getPlayerName(self):
return "xXx_7h3_5cRuM_M45T3r_xXx"
def nnheuristic(self, board: Goban.Board, color):
if board.is_game_over():
if board.winner() == board._EMPTY:
return 0.5
return math.inf if board.winner() == self._mycolor else -math.inf
go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)
self.model.eval()
with torch.no_grad():
prediction = self.model(go_board).item()
if color == Goban.Board._BLACK:
return prediction
return 1 - prediction
def getPlayerMove(self):
st = time.time()
if self._board.is_game_over():
print("Referee told me to play but the game is over!")
return "PASS"
# Take more time in endgame
if self._board._nbBLACK + self._board._nbWHITE < 10:
duration = 5
elif self._board._nbBLACK + self._board._nbWHITE < 40:
duration = 20
else:
duration = 30
# Play quickly if lack of time
duration = min(duration, (self.maxtime - self.time) / 10)
move = -1
b, w = self._board.compute_score()
# If passing wins the game, pass
if (
self.last_op_move == -1
and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0
):
move = -1
# Play greedily opening moves early in the game
elif self._board._nbBLACK + self._board._nbWHITE < 10:
turn = self._board._nbBLACK + self._board._nbWHITE
for play in self.plays:
if (
len(play["moves"]) > turn
and Goban.Board.name_to_flat(play["moves"][turn])
in self._board.legal_moves()
):
move = Goban.Board.name_to_flat(play["moves"][turn])
# Use iddfs alphabeta
else:
move = IDDFS(
self._board,
self.nnheuristic,
self._mycolor,
duration=duration,
max_depth=64,
)
nd = time.time()
self.time += nd - st
print(move, (nd - st), file=stderr)
self._board.push(move)
return Goban.Board.flat_to_name(move)
def playOpponentMove(self, move):
print("Opponent played ", move) # New here
self._board.push(Goban.Board.name_to_flat(move))
self.last_op_move = Goban.Board.name_to_flat(move)
def newGame(self, color):
self._board = Goban.Board()
self._mycolor = color
self.last_op_move = -2
self.time = 0
def endGame(self, winner):
if self._mycolor == winner:
print("I won!!!")
else:
print("I lost :(!!")