feat: use our NN heuristic

This commit is contained in:
Nemo D'ACREMONT 2025-05-17 19:39:08 +02:00
parent 7b418ab5cb
commit 21392f229d
5 changed files with 172 additions and 12 deletions

View File

@ -2,7 +2,7 @@
import Goban import Goban
import myPlayer import myPlayer
import randomPlayer import gnugoPlayer
import time import time
from io import StringIO from io import StringIO
import sys import sys
@ -14,7 +14,7 @@ player1 = myPlayer.myPlayer()
player1.newGame(Goban.Board._BLACK) player1.newGame(Goban.Board._BLACK)
players.append(player1) players.append(player1)
player2 = randomPlayer.myPlayer() player2 = gnugoPlayer.myPlayer()
player2.newGame(Goban.Board._WHITE) player2.newGame(Goban.Board._WHITE)
players.append(player2) players.append(player2)

View File

@ -19,7 +19,7 @@ def _alphabeta(
depth: int = 3, depth: int = 3,
) -> tuple[float, Any]: ) -> tuple[float, Any]:
wantMax = board.next_player != color wantMax = (board.next_player != color)
if depth == 0 or board.is_game_over(): if depth == 0 or board.is_game_over():
return heuristic(board, color), move return heuristic(board, color), move
@ -95,7 +95,7 @@ def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42):
depth = 1 depth = 1
move = -1 move = -1
while time.time() - st < duration and depth < maxdepth: while time.time() - st < duration and depth <= maxdepth:
print("depth:", depth, time.time() - st, file=stderr) print("depth:", depth, time.time() - st, file=stderr)
move = _alphabeta( move = _alphabeta(
board, heuristic, color, move=-1, alpha=-10, beta=10, depth=depth board, heuristic, color, move=-1, alpha=-10, beta=10, depth=depth

View File

@ -5,11 +5,155 @@ myPlayer class.
Right now, this class contains the copy of the randomPlayer. But you have to change this! Right now, this class contains the copy of the randomPlayer. But you have to change this!
""" """
from sys import stderr
import time import time
import Goban import Goban
from random import choice from random import choice
from moveSearch import IDDFS, alphabeta from moveSearch import IDDFS, alphabeta
from playerInterface import * from playerInterface import *
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.utils.data import Dataset
def setup_device():
torch.set_float32_matmul_precision("medium")
# Allows to use the GPU if available
if torch.cuda.is_available():
device = torch.device("cuda")
torch.backends.cuda.matmul.allow_tf32 = True
elif torch.backends.mps.is_available():
device = torch.device("mps")
else:
device = torch.device("cpu")
return device
def goban2Go(board: Goban.Board):
goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
black_plays = (board.next_player() == Goban.Board._BLACK)
flat = board.get_board()
for i in range(8):
for j in range(8):
if flat[i * 8 + j] == Goban.Board._BLACK:
goBoard[0, i, j] = 1
elif flat[i * 8 + j] == Goban.Board._WHITE:
goBoard[1, i, j] = 1
goBoard[2,:,:] = 1 if black_plays else 0
return goBoard
class GoModel(nn.Module):
def __init__(self):
super(GoModel, self).__init__()
self.net = torch.nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(16),
torch.nn.ReLU(),
nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(32),
torch.nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.Dropout(0.4),
torch.nn.ReLU(),
nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128),
torch.nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128),
torch.nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128),
torch.nn.ReLU(),
nn.Flatten(),
nn.Linear(128 * 8 * 8, 128),
nn.BatchNorm1d(128),
torch.nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(128, 1),
nn.Sigmoid()
)
def forward(self, x):
if self.training:
return self.net(x)
else:
y = self.net(x)
batch_size = x.size(0)
x_rotated = torch.stack([torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1) # x_rotated: [batch_size, 4, 3, 8, 8]
x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8]
with torch.no_grad():
y_rotated = self.net(x_rotated) # [batch_size*4, 1]
# Reshape to get them by rotation
y_rotated = y_rotated.view(batch_size, 4, -1) # [batch_size, 4, 1]
y_mean = y_rotated.mean(dim=1) # [batch_size, 1]
return y_mean
class GoDataset(Dataset):
def __init__(self, data, device, test=False):
def label(d, j):
if j == 0:
return d["black_wins"] / d["rollouts"]
else:
return 1 - label(d, 0)
def board(d, j, k):
if j == 0:
out = stones_to_board(d["black_stones"], d["white_stones"], d["depth"] % 2 == 0)
else:
out = stones_to_board(d["white_stones"], d["black_stones"], d["depth"] % 2 == 1)
if k == 0:
return out
else:
return out.flipud()
if test:
dims = [1, 2]
self.boards = torch.from_numpy(np.array([
board(d, 0, 0) for d in data
])).float().to(device)
self.labels = torch.from_numpy(np.array(
[label(d, 0) for d in data],
)).float().to(device)
else:
dims = [1, 2]
self.boards = torch.from_numpy(np.array([
torch.rot90(board(d, j, k), i, dims)
for d in data
for k in range(2)
for i in range(4)
for j in range(2)
])).float().to(device)
self.labels = torch.from_numpy(np.array(
[label(d, j) for d in data for _ in range(4) for _k in range(2) for j in range(2)],
)).float().to(device)
def __len__(self):
return len(self.boards)
def __getitem__(self, i):
return self.boards[i], self.labels[i]
class myPlayer(PlayerInterface): class myPlayer(PlayerInterface):
@ -24,6 +168,14 @@ class myPlayer(PlayerInterface):
self._mycolor = None self._mycolor = None
self.moveCount = 0 self.moveCount = 0
self.device = setup_device()
print(self.device)
self.model = GoModel().to(self.device)
checkpoint = torch.load("scrum.pt", weights_only=True)
self.model.load_state_dict(checkpoint["model_state_dict"])
def getPlayerName(self): def getPlayerName(self):
return "xXx_7h3_5cRuM_M45T3r_xXx" return "xXx_7h3_5cRuM_M45T3r_xXx"
@ -35,23 +187,30 @@ class myPlayer(PlayerInterface):
score[0] - score[1] if color == Goban.Board._BLACK else score[1] - score[0] score[0] - score[1] if color == Goban.Board._BLACK else score[1] - score[0]
) )
def nnheuristic(self, board, color):
go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)
self.model.eval()
with torch.no_grad():
prediction = self.model(go_board)
return prediction
def getPlayerMove(self): def getPlayerMove(self):
if self._board.is_game_over(): if self._board.is_game_over():
print("Referee told me to play but the game is over!") print("Referee told me to play but the game is over!")
return "PASS" return "PASS"
if self.moveCount < 10: if self.moveCount < 40:
max_depth = 1 max_depth = 1
elif self.moveCount < 20:
max_depth = 2
elif self.moveCount < 40:
max_depth = 3
else: else:
max_depth = 24 max_depth = 5
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
move = IDDFS( move = IDDFS(
self._board, self.simple_heuristic, self._mycolor, duration=1., maxdepth=max_depth self._board, self.nnheuristic, self._mycolor, duration=1., maxdepth=max_depth
) # IDDFS(self._board, self.simple_heuristic, self._mycolor, 1.) )
self._board.push(move) self._board.push(move)
# New here: allows to consider internal representations of moves # New here: allows to consider internal representations of moves

View File

@ -1 +1,2 @@
numpy numpy
torch

BIN
go_player/scrum.pt Normal file

Binary file not shown.