feat: use our NN heuristic
This commit is contained in:
parent
7b418ab5cb
commit
21392f229d
@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import Goban
|
import Goban
|
||||||
import myPlayer
|
import myPlayer
|
||||||
import randomPlayer
|
import gnugoPlayer
|
||||||
import time
|
import time
|
||||||
from io import StringIO
|
from io import StringIO
|
||||||
import sys
|
import sys
|
||||||
@ -14,7 +14,7 @@ player1 = myPlayer.myPlayer()
|
|||||||
player1.newGame(Goban.Board._BLACK)
|
player1.newGame(Goban.Board._BLACK)
|
||||||
players.append(player1)
|
players.append(player1)
|
||||||
|
|
||||||
player2 = randomPlayer.myPlayer()
|
player2 = gnugoPlayer.myPlayer()
|
||||||
player2.newGame(Goban.Board._WHITE)
|
player2.newGame(Goban.Board._WHITE)
|
||||||
players.append(player2)
|
players.append(player2)
|
||||||
|
|
||||||
|
@ -19,7 +19,7 @@ def _alphabeta(
|
|||||||
depth: int = 3,
|
depth: int = 3,
|
||||||
) -> tuple[float, Any]:
|
) -> tuple[float, Any]:
|
||||||
|
|
||||||
wantMax = board.next_player != color
|
wantMax = (board.next_player != color)
|
||||||
if depth == 0 or board.is_game_over():
|
if depth == 0 or board.is_game_over():
|
||||||
return heuristic(board, color), move
|
return heuristic(board, color), move
|
||||||
|
|
||||||
@ -95,7 +95,7 @@ def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42):
|
|||||||
depth = 1
|
depth = 1
|
||||||
move = -1
|
move = -1
|
||||||
|
|
||||||
while time.time() - st < duration and depth < maxdepth:
|
while time.time() - st < duration and depth <= maxdepth:
|
||||||
print("depth:", depth, time.time() - st, file=stderr)
|
print("depth:", depth, time.time() - st, file=stderr)
|
||||||
move = _alphabeta(
|
move = _alphabeta(
|
||||||
board, heuristic, color, move=-1, alpha=-10, beta=10, depth=depth
|
board, heuristic, color, move=-1, alpha=-10, beta=10, depth=depth
|
||||||
|
@ -5,11 +5,155 @@ myPlayer class.
|
|||||||
Right now, this class contains the copy of the randomPlayer. But you have to change this!
|
Right now, this class contains the copy of the randomPlayer. But you have to change this!
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
from sys import stderr
|
||||||
import time
|
import time
|
||||||
import Goban
|
import Goban
|
||||||
from random import choice
|
from random import choice
|
||||||
from moveSearch import IDDFS, alphabeta
|
from moveSearch import IDDFS, alphabeta
|
||||||
from playerInterface import *
|
from playerInterface import *
|
||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import numpy as np
|
||||||
|
from torch.utils.data import Dataset
|
||||||
|
|
||||||
|
def setup_device():
|
||||||
|
torch.set_float32_matmul_precision("medium")
|
||||||
|
# Allows to use the GPU if available
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
device = torch.device("cuda")
|
||||||
|
torch.backends.cuda.matmul.allow_tf32 = True
|
||||||
|
elif torch.backends.mps.is_available():
|
||||||
|
device = torch.device("mps")
|
||||||
|
else:
|
||||||
|
device = torch.device("cpu")
|
||||||
|
|
||||||
|
return device
|
||||||
|
|
||||||
|
|
||||||
|
def goban2Go(board: Goban.Board):
|
||||||
|
goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
|
||||||
|
black_plays = (board.next_player() == Goban.Board._BLACK)
|
||||||
|
|
||||||
|
flat = board.get_board()
|
||||||
|
for i in range(8):
|
||||||
|
for j in range(8):
|
||||||
|
if flat[i * 8 + j] == Goban.Board._BLACK:
|
||||||
|
goBoard[0, i, j] = 1
|
||||||
|
elif flat[i * 8 + j] == Goban.Board._WHITE:
|
||||||
|
goBoard[1, i, j] = 1
|
||||||
|
|
||||||
|
goBoard[2,:,:] = 1 if black_plays else 0
|
||||||
|
|
||||||
|
return goBoard
|
||||||
|
|
||||||
|
|
||||||
|
class GoModel(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super(GoModel, self).__init__()
|
||||||
|
|
||||||
|
self.net = torch.nn.Sequential(
|
||||||
|
nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False),
|
||||||
|
nn.BatchNorm2d(16),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
|
nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False),
|
||||||
|
nn.BatchNorm2d(32),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
|
nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
|
||||||
|
nn.BatchNorm2d(64),
|
||||||
|
nn.Dropout(0.4),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
|
nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
|
||||||
|
nn.BatchNorm2d(128),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
|
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
|
||||||
|
nn.BatchNorm2d(128),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
|
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
|
||||||
|
nn.BatchNorm2d(128),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
|
nn.Flatten(),
|
||||||
|
|
||||||
|
nn.Linear(128 * 8 * 8, 128),
|
||||||
|
nn.BatchNorm1d(128),
|
||||||
|
torch.nn.ReLU(),
|
||||||
|
|
||||||
|
nn.Dropout(0.4),
|
||||||
|
nn.Linear(128, 1),
|
||||||
|
nn.Sigmoid()
|
||||||
|
)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
if self.training:
|
||||||
|
return self.net(x)
|
||||||
|
else:
|
||||||
|
y = self.net(x)
|
||||||
|
batch_size = x.size(0)
|
||||||
|
|
||||||
|
x_rotated = torch.stack([torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1) # x_rotated: [batch_size, 4, 3, 8, 8]
|
||||||
|
x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8]
|
||||||
|
|
||||||
|
with torch.no_grad():
|
||||||
|
y_rotated = self.net(x_rotated) # [batch_size*4, 1]
|
||||||
|
|
||||||
|
# Reshape to get them by rotation
|
||||||
|
y_rotated = y_rotated.view(batch_size, 4, -1) # [batch_size, 4, 1]
|
||||||
|
y_mean = y_rotated.mean(dim=1) # [batch_size, 1]
|
||||||
|
|
||||||
|
return y_mean
|
||||||
|
|
||||||
|
|
||||||
|
class GoDataset(Dataset):
|
||||||
|
def __init__(self, data, device, test=False):
|
||||||
|
def label(d, j):
|
||||||
|
if j == 0:
|
||||||
|
return d["black_wins"] / d["rollouts"]
|
||||||
|
else:
|
||||||
|
return 1 - label(d, 0)
|
||||||
|
|
||||||
|
def board(d, j, k):
|
||||||
|
if j == 0:
|
||||||
|
out = stones_to_board(d["black_stones"], d["white_stones"], d["depth"] % 2 == 0)
|
||||||
|
else:
|
||||||
|
out = stones_to_board(d["white_stones"], d["black_stones"], d["depth"] % 2 == 1)
|
||||||
|
|
||||||
|
if k == 0:
|
||||||
|
return out
|
||||||
|
else:
|
||||||
|
return out.flipud()
|
||||||
|
|
||||||
|
if test:
|
||||||
|
dims = [1, 2]
|
||||||
|
self.boards = torch.from_numpy(np.array([
|
||||||
|
board(d, 0, 0) for d in data
|
||||||
|
])).float().to(device)
|
||||||
|
self.labels = torch.from_numpy(np.array(
|
||||||
|
[label(d, 0) for d in data],
|
||||||
|
)).float().to(device)
|
||||||
|
else:
|
||||||
|
dims = [1, 2]
|
||||||
|
self.boards = torch.from_numpy(np.array([
|
||||||
|
torch.rot90(board(d, j, k), i, dims)
|
||||||
|
for d in data
|
||||||
|
for k in range(2)
|
||||||
|
for i in range(4)
|
||||||
|
for j in range(2)
|
||||||
|
])).float().to(device)
|
||||||
|
self.labels = torch.from_numpy(np.array(
|
||||||
|
[label(d, j) for d in data for _ in range(4) for _k in range(2) for j in range(2)],
|
||||||
|
)).float().to(device)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
return len(self.boards)
|
||||||
|
|
||||||
|
def __getitem__(self, i):
|
||||||
|
return self.boards[i], self.labels[i]
|
||||||
|
|
||||||
|
|
||||||
class myPlayer(PlayerInterface):
|
class myPlayer(PlayerInterface):
|
||||||
@ -24,6 +168,14 @@ class myPlayer(PlayerInterface):
|
|||||||
self._mycolor = None
|
self._mycolor = None
|
||||||
self.moveCount = 0
|
self.moveCount = 0
|
||||||
|
|
||||||
|
self.device = setup_device()
|
||||||
|
print(self.device)
|
||||||
|
|
||||||
|
self.model = GoModel().to(self.device)
|
||||||
|
|
||||||
|
checkpoint = torch.load("scrum.pt", weights_only=True)
|
||||||
|
self.model.load_state_dict(checkpoint["model_state_dict"])
|
||||||
|
|
||||||
def getPlayerName(self):
|
def getPlayerName(self):
|
||||||
return "xXx_7h3_5cRuM_M45T3r_xXx"
|
return "xXx_7h3_5cRuM_M45T3r_xXx"
|
||||||
|
|
||||||
@ -35,23 +187,30 @@ class myPlayer(PlayerInterface):
|
|||||||
score[0] - score[1] if color == Goban.Board._BLACK else score[1] - score[0]
|
score[0] - score[1] if color == Goban.Board._BLACK else score[1] - score[0]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def nnheuristic(self, board, color):
|
||||||
|
go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)
|
||||||
|
|
||||||
|
self.model.eval()
|
||||||
|
with torch.no_grad():
|
||||||
|
prediction = self.model(go_board)
|
||||||
|
|
||||||
|
return prediction
|
||||||
|
|
||||||
def getPlayerMove(self):
|
def getPlayerMove(self):
|
||||||
if self._board.is_game_over():
|
if self._board.is_game_over():
|
||||||
print("Referee told me to play but the game is over!")
|
print("Referee told me to play but the game is over!")
|
||||||
return "PASS"
|
return "PASS"
|
||||||
|
|
||||||
if self.moveCount < 10:
|
if self.moveCount < 40:
|
||||||
max_depth = 1
|
max_depth = 1
|
||||||
elif self.moveCount < 20:
|
|
||||||
max_depth = 2
|
|
||||||
elif self.moveCount < 40:
|
|
||||||
max_depth = 3
|
|
||||||
else:
|
else:
|
||||||
max_depth = 24
|
max_depth = 5
|
||||||
|
|
||||||
|
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
|
||||||
move = IDDFS(
|
move = IDDFS(
|
||||||
self._board, self.simple_heuristic, self._mycolor, duration=1., maxdepth=max_depth
|
self._board, self.nnheuristic, self._mycolor, duration=1., maxdepth=max_depth
|
||||||
) # IDDFS(self._board, self.simple_heuristic, self._mycolor, 1.)
|
)
|
||||||
|
|
||||||
self._board.push(move)
|
self._board.push(move)
|
||||||
|
|
||||||
# New here: allows to consider internal representations of moves
|
# New here: allows to consider internal representations of moves
|
||||||
|
@ -1 +1,2 @@
|
|||||||
numpy
|
numpy
|
||||||
|
torch
|
||||||
|
BIN
go_player/scrum.pt
Normal file
BIN
go_player/scrum.pt
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user