Compare commits

..

No commits in common. "fb8250ff04c1a52bd26dd497af46c2fb9c67e264" and "96b38152803d14cf565f4c9abb5e4385ca63a1e1" have entirely different histories.

7 changed files with 153 additions and 223 deletions

View File

@ -4,5 +4,3 @@ __pycache__
/go-package.tgz /go-package.tgz
*.ipynb *.ipynb
/chess /chess
/tp_player-ndacremont_meyben
/tp_player-ndacremont_meyben.tar.gz

View File

@ -1,27 +0,0 @@
files += tp_player-ndacremont_meyben/README.md
files += tp_player-ndacremont_meyben/scrum.pt
files += tp_player-ndacremont_meyben/plays-8x8.json
files += tp_player-ndacremont_meyben/localGame.py
files += tp_player-ndacremont_meyben/namedGame.py
files += tp_player-ndacremont_meyben/Goban.py
files += tp_player-ndacremont_meyben/myPlayer.py
files += tp_player-ndacremont_meyben/moveSearch.py
files += tp_player-ndacremont_meyben/playerInterface.py
files += tp_player-ndacremont_meyben/requirements.txt
.PHONY += all
all: tp_player-ndacremont_meyben.tar.gz
tp_player-ndacremont_meyben.tar.gz: $(files)
tar -cvzf $@ $^
tp_player-ndacremont_meyben/%: %
@mkdir -p $(dir $@)
cp $^ $@
.PHONY += clean
clean:
$(RM) -r tp_player-ndacremont_meyben tp_player-ndacremont_meyben.tar.gz
.PHONY: $(PHONY)

View File

@ -1,31 +0,0 @@
# TP Noté joueur Go -- Nemo D'ACREMONT, Martin EYBEN, G1
## Fichiers nécessaires pour lancer le joueur
Les fichiers suivants sont nécessaire pour lancer le joueur :
* myPlayer.py
* moveSearch.py
* playerInterface.py
* scrum.pt
* plays-8x8.json
## Librairies nécessaire
Ces librairies sont listées dans le fichier `requirements.txt` et sont les
suivantes :
* PyTorch
* Numpy
## Techniques utilisées
* IDDFS avec alphabeta
* Stop le calcule du coup dans le parcours alphabeta si on dépasse le temps
alloué
* Joue des coups classiques sans heuristique lorsqu'il y a peu (<10) de pions
sur le plateau
* Passe si le joueur l'adversaire vient de passer et qu'on est en train de
gagner
* Plus de pions sont joués, plus on alloue du temps à jouer, sauf si on est
proche de 30min, dans quel cas on joue rapidement

View File

@ -2,6 +2,7 @@
import Goban import Goban
import myPlayer import myPlayer
import gnugoPlayer
import time import time
from io import StringIO from io import StringIO
import sys import sys
@ -13,7 +14,7 @@ player1 = myPlayer.myPlayer()
player1.newGame(Goban.Board._BLACK) player1.newGame(Goban.Board._BLACK)
players.append(player1) players.append(player1)
player2 = myPlayer.myPlayer() player2 = gnugoPlayer.myPlayer()
player2.newGame(Goban.Board._WHITE) player2.newGame(Goban.Board._WHITE)
players.append(player2) players.append(player2)

View File

@ -5,74 +5,83 @@ from typing import Any, Callable
import Goban import Goban
# Returns heuristic, move
def _alphabeta( def _alphabeta(
board: Goban.Board, board: Goban.Board,
heuristic: Callable[[Goban.Board, Any], float], heuristic: Callable[[Goban.Board, Any], float],
color, color,
move,
alpha=-math.inf, alpha=-math.inf,
beta=math.inf, beta=math.inf,
depth: int = 3, depth: int = 3,
shouldStop=lambda: False, shouldStop = lambda: False
) -> tuple[float, Any]: ) -> tuple[float, Any]:
if board.is_game_over() or depth == 0:
return heuristic(board, color), None
wantMax = (board.next_player() == color) wantMax = (board.next_player == color)
best_move = -1 if depth == 0 or board.is_game_over():
return heuristic(board, board.next_player()), move
if wantMax: if wantMax:
acc = -math.inf acc = -math.inf, None
for move in board.generate_legal_moves(): for move in board.generate_legal_moves():
if Goban.Board.flat_to_name(move) == "PASS": if Goban.Board.flat_to_name(move) == "PASS":
continue continue
board.push(move) board.push(move)
value = _alphabeta( value = (
_alphabeta(
board, board,
heuristic=heuristic,
color=color,
alpha=alpha, alpha=alpha,
beta=beta, beta=beta,
move=move,
heuristic=heuristic,
color=color,
depth=depth - 1, depth=depth - 1,
shouldStop=shouldStop, )[0],
)[0] move,
)
acc = max(
acc,
value,
key=lambda t: t[0],
)
board.pop() board.pop()
if value > acc: if shouldStop() or acc[0] >= beta:
acc = value
best_move = move
alpha = max(alpha, acc)
if shouldStop() or acc >= beta:
break # beta cutoff break # beta cutoff
alpha = max(alpha, acc[0])
else: else:
acc = math.inf acc = math.inf, None
for move in board.generate_legal_moves(): for move in board.generate_legal_moves():
if Goban.Board.flat_to_name(move) == "PASS": if Goban.Board.flat_to_name(move) == "PASS":
continue continue
board.push(move) board.push(move)
value = _alphabeta( value = (
_alphabeta(
board, board,
heuristic=heuristic,
color=color,
alpha=alpha, alpha=alpha,
beta=beta, beta=beta,
move=move,
heuristic=heuristic,
color=color,
depth=depth - 1, depth=depth - 1,
shouldStop=shouldStop, )[0],
)[0] move,
)
acc = min(
acc,
value,
key=lambda t: t[0],
)
board.pop() board.pop()
if value < acc: if shouldStop() or acc[0] <= alpha:
acc = value
best_move = move
beta = min(beta, acc)
if shouldStop() or acc <= alpha:
break # alpha cutoff break # alpha cutoff
beta = min(beta, acc[0])
return acc, best_move return acc
def alphabeta( def alphabeta(
@ -81,30 +90,36 @@ def alphabeta(
color, color,
depth: int = 3, depth: int = 3,
): ):
_, move = _alphabeta(board, heuristic=heuristic, color=color, depth=depth) _, move = _alphabeta(board, move=-1, heuristic=heuristic, color=color, depth=depth)
return move return move
def IDDFS( def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42):
board: Goban.Board, st = time.time()
heuristic: Callable[[Goban.Board, Any], float], shouldStop = (lambda: time.time() - st > duration)
color, depth = 0
max_depth: int = 10, move = -1
duration: float = 5.0, # Duration in seconds score = -1
):
best_move = -1
start_time = time.time()
shouldStop = lambda: (time.time() - start_time) >= duration
for depth in range(1, max_depth + 1): while not shouldStop() and depth <= maxdepth:
value, move = _alphabeta( if depth % 2 == 0:
board, heuristic=heuristic, color=color, depth=depth, shouldStop=shouldStop score, move = _alphabeta(
board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
) )
if shouldStop(): if score == math.inf:
break return move, score
print(f"{depth}, {value}", file=stderr) else:
best_move = move score, move = _alphabeta(
board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
)
return best_move if score == -math.inf:
return move, score
print("depth:", depth, time.time() - st, score, file=stderr)
depth += 1
print(time.time() - st, duration, depth, file=stderr)
return move, score

View File

@ -9,14 +9,14 @@ from sys import stderr
import time import time
import math import math
import Goban import Goban
from moveSearch import IDDFS from random import choice
from moveSearch import IDDFS, alphabeta
from playerInterface import * from playerInterface import *
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import numpy as np import numpy as np
import json from torch.utils.data import Dataset
def setup_device(): def setup_device():
# Allows to use the GPU if available # Allows to use the GPU if available
@ -31,11 +31,8 @@ def setup_device():
def goban2Go(board: Goban.Board): def goban2Go(board: Goban.Board):
"""
Convert a goban board to a tensor for the model
"""
goBoard = torch.zeros((3, 8, 8), dtype=torch.float32) goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
black_plays = board.next_player() == Goban.Board._BLACK black_plays = (board.next_player() == Goban.Board._BLACK)
flat = board.get_board() flat = board.get_board()
for i in range(8): for i in range(8):
@ -45,10 +42,9 @@ def goban2Go(board: Goban.Board):
elif flat[i * 8 + j] == Goban.Board._WHITE: elif flat[i * 8 + j] == Goban.Board._WHITE:
goBoard[1, i, j] = 1 goBoard[1, i, j] = 1
goBoard[2, :, :] = 1 if black_plays else 0 goBoard[2,:,:] = 1 if black_plays else 0
# sometime, a little bit of magic is required return goBoard
return torch.from_numpy(np.array([goBoard])).float()
class GoModel(nn.Module): class GoModel(nn.Module):
@ -59,29 +55,37 @@ class GoModel(nn.Module):
nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False), nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(16), nn.BatchNorm2d(16),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False), nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(32), nn.BatchNorm2d(32),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False), nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64), nn.BatchNorm2d(64),
nn.Dropout(0.4), nn.Dropout(0.4),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False), nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128), nn.BatchNorm2d(128),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128), nn.BatchNorm2d(128),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128), nn.BatchNorm2d(128),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Flatten(), nn.Flatten(),
nn.Linear(128 * 8 * 8, 128), nn.Linear(128 * 8 * 8, 128),
nn.BatchNorm1d(128), nn.BatchNorm1d(128),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Dropout(0.4), nn.Dropout(0.4),
nn.Linear(128, 1), nn.Linear(128, 1),
nn.Sigmoid(), nn.Sigmoid()
) )
def forward(self, x): def forward(self, x):
@ -91,9 +95,7 @@ class GoModel(nn.Module):
y = self.net(x) y = self.net(x)
batch_size = x.size(0) batch_size = x.size(0)
x_rotated = torch.stack( x_rotated = torch.stack([torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1) # x_rotated: [batch_size, 4, 3, 8, 8]
[torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1
) # x_rotated: [batch_size, 4, 3, 8, 8]
x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8] x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8]
with torch.no_grad(): with torch.no_grad():
@ -116,44 +118,38 @@ class myPlayer(PlayerInterface):
def __init__(self): def __init__(self):
self._board = Goban.Board() self._board = Goban.Board()
self._mycolor = None self._mycolor = None
self.last_op_move = -2
self.device = setup_device() self.device = setup_device()
print(self.device)
self.model = GoModel().to(self.device) self.model = GoModel().to(self.device)
checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device) checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
self.model.load_state_dict(checkpoint["model_state_dict"]) self.model.load_state_dict(checkpoint["model_state_dict"])
self.last_op_move = None
self.maxtime = 1800 self.maxtime = 1800
self.time = 0 self.time = 0
# Load plays for the opening
self.plays = []
with open("plays-8x8.json") as f:
plays = json.load(f)
# Only keep the plays we want
l = "W" if self._mycolor == Goban.Board._WHITE else "B"
filtered = filter(lambda t: l in t["result"], plays)
# We sort to take the most advantageous openings
lp = l + "+"
for el in filtered:
el["result"] = float(el["result"].replace(lp, ""))
self.plays.append(el)
self.plays.sort(key=lambda t: t["result"])
def getPlayerName(self): def getPlayerName(self):
return "xXx_7h3_5cRuM_M45T3r_xXx" return "xXx_7h3_5cRuM_M45T3r_xXx"
@staticmethod
def simple_heuristic(board, color):
# Simple stone difference heuristic
score = board.compute_score()
return (
score[0] - score[1] if color == Goban.Board._BLACK else score[1] - score[0]
)
def nnheuristic(self, board: Goban.Board, color): def nnheuristic(self, board: Goban.Board, color):
if board.is_game_over(): if board.is_game_over():
if board.winner() == board._EMPTY: if board.winner() == board._EMPTY:
return 0.5 return 0.5
return math.inf if board.winner() == self._mycolor else -math.inf return math.inf if board.winner() == color else -math.inf
go_board = goban2Go(board).to(self.device) go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)
self.model.eval() self.model.eval()
with torch.no_grad(): with torch.no_grad():
@ -170,68 +166,47 @@ class myPlayer(PlayerInterface):
print("Referee told me to play but the game is over!") print("Referee told me to play but the game is over!")
return "PASS" return "PASS"
# Take more time in endgame duration = 1.
if self._board._nbBLACK + self._board._nbWHITE < 10: if self._board._nbBLACK + self._board._nbWHITE < 10:
duration = 3
elif self._board._nbBLACK + self._board._nbWHITE < 30:
duration = 5 duration = 5
elif self._board._nbBLACK + self._board._nbWHITE < 40: elif self._board._nbBLACK + self._board._nbWHITE > 40:
duration = 20 duration = 64 - (self._board._nbBLACK + self._board._nbWHITE)
else:
duration = 30
# Play quickly if lack of time
duration = min(duration, (self.maxtime - self.time) / 10) duration = min(duration, (self.maxtime - self.time) / 10)
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
if self.last_op_move == "PASS" and self._board.diff_stones_board() * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
move = -1 move = -1
b, w = self._board.compute_score() score = math.inf
# If passing wins the game, pass
if (
self.last_op_move == -1
and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0
):
move = -1
# Play greedily opening moves early in the game
elif self._board._nbBLACK + self._board._nbWHITE < 10:
turn = self._board._nbBLACK + self._board._nbWHITE
for play in self.plays:
if (
len(play["moves"]) > turn
and Goban.Board.name_to_flat(play["moves"][turn])
in self._board.legal_moves()
):
move = Goban.Board.name_to_flat(play["moves"][turn])
# Use iddfs alphabeta
else: else:
move = IDDFS( move, score = IDDFS(
self._board, self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=64
self.nnheuristic,
self._mycolor,
duration=duration,
max_depth=64,
) )
nd = time.time()
self.time += nd - st
print(move, (nd - st), file=stderr)
self._board.push(move) self._board.push(move)
print(move, score, file=stderr)
nd = time.time()
self.time += (nd - st)
# New here: allows to consider internal representations of moves
# move is an internal representation. To communicate with the interface I need to change if to a string
return Goban.Board.flat_to_name(move) return Goban.Board.flat_to_name(move)
def playOpponentMove(self, move): def playOpponentMove(self, move):
print("Opponent played ", move) # New here print("Opponent played ", move) # New here
# the board needs an internal represetation to push the move. Not a string
self._board.push(Goban.Board.name_to_flat(move)) self._board.push(Goban.Board.name_to_flat(move))
self.last_op_move = Goban.Board.name_to_flat(move) self.last_op_move = move
def newGame(self, color): def newGame(self, color):
self._board = Goban.Board()
self._mycolor = color self._mycolor = color
self.last_op_move = -2 self._opponent = Goban.Board.flip(color)
self.time = 0
def endGame(self, winner): def endGame(self, winner):
if self._mycolor == winner: if self._mycolor == winner:

File diff suppressed because one or more lines are too long