Compare commits

..

10 Commits

Author SHA1 Message Date
Nemo D'ACREMONT
fb8250ff04
feat: fix indent 2025-05-19 19:58:10 +02:00
Nemo D'ACREMONT
c4358b19e3 fix crash 2025-05-18 22:05:26 +02:00
Nemo D'ACREMONT
b83e65a52f
i lied 2025-05-18 20:06:07 +02:00
Nemo D'ACREMONT
22dfbcbbc1
rendu 2025-05-18 20:02:38 +02:00
Nemo D'ACREMONT
91cf372a7e
formatted 2025-05-18 19:51:57 +02:00
Nemo D'ACREMONT
0e4d30fa0d final 2025-05-18 19:50:41 +02:00
Nemo D'ACREMONT
15a834488b fix pass 2025-05-18 14:53:23 +02:00
Nemo D'ACREMONT
3b2fe7671d feat: add opeinign 2025-05-18 14:38:14 +02:00
Nemo D'ACREMONT
18b927a213 aa 2025-05-18 14:24:43 +02:00
Nemo D'ACREMONT
fc060c7da1
feat: add things :) 2025-05-18 14:01:03 +02:00
7 changed files with 227 additions and 157 deletions

View File

@ -4,3 +4,5 @@ __pycache__
/go-package.tgz /go-package.tgz
*.ipynb *.ipynb
/chess /chess
/tp_player-ndacremont_meyben
/tp_player-ndacremont_meyben.tar.gz

27
go_player/Makefile Normal file
View File

@ -0,0 +1,27 @@
files += tp_player-ndacremont_meyben/README.md
files += tp_player-ndacremont_meyben/scrum.pt
files += tp_player-ndacremont_meyben/plays-8x8.json
files += tp_player-ndacremont_meyben/localGame.py
files += tp_player-ndacremont_meyben/namedGame.py
files += tp_player-ndacremont_meyben/Goban.py
files += tp_player-ndacremont_meyben/myPlayer.py
files += tp_player-ndacremont_meyben/moveSearch.py
files += tp_player-ndacremont_meyben/playerInterface.py
files += tp_player-ndacremont_meyben/requirements.txt
.PHONY += all
all: tp_player-ndacremont_meyben.tar.gz
tp_player-ndacremont_meyben.tar.gz: $(files)
tar -cvzf $@ $^
tp_player-ndacremont_meyben/%: %
@mkdir -p $(dir $@)
cp $^ $@
.PHONY += clean
clean:
$(RM) -r tp_player-ndacremont_meyben tp_player-ndacremont_meyben.tar.gz
.PHONY: $(PHONY)

31
go_player/README.md Normal file
View File

@ -0,0 +1,31 @@
# TP Noté joueur Go -- Nemo D'ACREMONT, Martin EYBEN, G1
## Fichiers nécessaires pour lancer le joueur
Les fichiers suivants sont nécessaire pour lancer le joueur :
* myPlayer.py
* moveSearch.py
* playerInterface.py
* scrum.pt
* plays-8x8.json
## Librairies nécessaire
Ces librairies sont listées dans le fichier `requirements.txt` et sont les
suivantes :
* PyTorch
* Numpy
## Techniques utilisées
* IDDFS avec alphabeta
* Stop le calcule du coup dans le parcours alphabeta si on dépasse le temps
alloué
* Joue des coups classiques sans heuristique lorsqu'il y a peu (<10) de pions
sur le plateau
* Passe si le joueur l'adversaire vient de passer et qu'on est en train de
gagner
* Plus de pions sont joués, plus on alloue du temps à jouer, sauf si on est
proche de 30min, dans quel cas on joue rapidement

View File

@ -2,7 +2,6 @@
import Goban import Goban
import myPlayer import myPlayer
import gnugoPlayer
import time import time
from io import StringIO from io import StringIO
import sys import sys
@ -14,7 +13,7 @@ player1 = myPlayer.myPlayer()
player1.newGame(Goban.Board._BLACK) player1.newGame(Goban.Board._BLACK)
players.append(player1) players.append(player1)
player2 = gnugoPlayer.myPlayer() player2 = myPlayer.myPlayer()
player2.newGame(Goban.Board._WHITE) player2.newGame(Goban.Board._WHITE)
players.append(player2) players.append(player2)

View File

@ -5,83 +5,74 @@ from typing import Any, Callable
import Goban import Goban
# Returns heuristic, move
def _alphabeta( def _alphabeta(
board: Goban.Board, board: Goban.Board,
heuristic: Callable[[Goban.Board, Any], float], heuristic: Callable[[Goban.Board, Any], float],
color, color,
move,
alpha=-math.inf, alpha=-math.inf,
beta=math.inf, beta=math.inf,
depth: int = 3, depth: int = 3,
shouldStop = lambda: False shouldStop=lambda: False,
) -> tuple[float, Any]: ) -> tuple[float, Any]:
if board.is_game_over() or depth == 0:
return heuristic(board, color), None
wantMax = (board.next_player == color) wantMax = (board.next_player() == color)
if depth == 0 or board.is_game_over(): best_move = -1
return heuristic(board, board.next_player()), move
if wantMax: if wantMax:
acc = -math.inf, None acc = -math.inf
for move in board.generate_legal_moves(): for move in board.generate_legal_moves():
if Goban.Board.flat_to_name(move) == "PASS": if Goban.Board.flat_to_name(move) == "PASS":
continue continue
board.push(move) board.push(move)
value = ( value = _alphabeta(
_alphabeta( board,
board, heuristic=heuristic,
alpha=alpha, color=color,
beta=beta, alpha=alpha,
move=move, beta=beta,
heuristic=heuristic, depth=depth - 1,
color=color, shouldStop=shouldStop,
depth=depth - 1, )[0]
)[0],
move,
)
acc = max(
acc,
value,
key=lambda t: t[0],
)
board.pop() board.pop()
if shouldStop() or acc[0] >= beta: if value > acc:
acc = value
best_move = move
alpha = max(alpha, acc)
if shouldStop() or acc >= beta:
break # beta cutoff break # beta cutoff
alpha = max(alpha, acc[0])
else: else:
acc = math.inf, None acc = math.inf
for move in board.generate_legal_moves(): for move in board.generate_legal_moves():
if Goban.Board.flat_to_name(move) == "PASS": if Goban.Board.flat_to_name(move) == "PASS":
continue continue
board.push(move) board.push(move)
value = ( value = _alphabeta(
_alphabeta( board,
board, heuristic=heuristic,
alpha=alpha, color=color,
beta=beta, alpha=alpha,
move=move, beta=beta,
heuristic=heuristic, depth=depth - 1,
color=color, shouldStop=shouldStop,
depth=depth - 1, )[0]
)[0],
move,
)
acc = min(
acc,
value,
key=lambda t: t[0],
)
board.pop() board.pop()
if shouldStop() or acc[0] <= alpha: if value < acc:
break # alpha cutoff acc = value
beta = min(beta, acc[0]) best_move = move
return acc beta = min(beta, acc)
if shouldStop() or acc <= alpha:
break # alpha cutoff
return acc, best_move
def alphabeta( def alphabeta(
@ -90,36 +81,30 @@ def alphabeta(
color, color,
depth: int = 3, depth: int = 3,
): ):
_, move = _alphabeta(board, move=-1, heuristic=heuristic, color=color, depth=depth) _, move = _alphabeta(board, heuristic=heuristic, color=color, depth=depth)
return move return move
def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42): def IDDFS(
st = time.time() board: Goban.Board,
shouldStop = (lambda: time.time() - st > duration) heuristic: Callable[[Goban.Board, Any], float],
depth = 0 color,
move = -1 max_depth: int = 10,
score = -1 duration: float = 5.0, # Duration in seconds
):
best_move = -1
start_time = time.time()
shouldStop = lambda: (time.time() - start_time) >= duration
while not shouldStop() and depth <= maxdepth: for depth in range(1, max_depth + 1):
if depth % 2 == 0: value, move = _alphabeta(
score, move = _alphabeta( board, heuristic=heuristic, color=color, depth=depth, shouldStop=shouldStop
board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop )
)
if score == math.inf: if shouldStop():
return move, score break
else: print(f"{depth}, {value}", file=stderr)
score, move = _alphabeta( best_move = move
board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
)
if score == -math.inf: return best_move
return move, score
print("depth:", depth, time.time() - st, score, file=stderr)
depth += 1
print(time.time() - st, duration, depth, file=stderr)
return move, score

View File

@ -9,14 +9,14 @@ from sys import stderr
import time import time
import math import math
import Goban import Goban
from random import choice from moveSearch import IDDFS
from moveSearch import IDDFS, alphabeta
from playerInterface import * from playerInterface import *
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import numpy as np import numpy as np
from torch.utils.data import Dataset import json
def setup_device(): def setup_device():
# Allows to use the GPU if available # Allows to use the GPU if available
@ -31,8 +31,11 @@ def setup_device():
def goban2Go(board: Goban.Board): def goban2Go(board: Goban.Board):
"""
Convert a goban board to a tensor for the model
"""
goBoard = torch.zeros((3, 8, 8), dtype=torch.float32) goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
black_plays = (board.next_player() == Goban.Board._BLACK) black_plays = board.next_player() == Goban.Board._BLACK
flat = board.get_board() flat = board.get_board()
for i in range(8): for i in range(8):
@ -42,9 +45,10 @@ def goban2Go(board: Goban.Board):
elif flat[i * 8 + j] == Goban.Board._WHITE: elif flat[i * 8 + j] == Goban.Board._WHITE:
goBoard[1, i, j] = 1 goBoard[1, i, j] = 1
goBoard[2,:,:] = 1 if black_plays else 0 goBoard[2, :, :] = 1 if black_plays else 0
return goBoard # sometime, a little bit of magic is required
return torch.from_numpy(np.array([goBoard])).float()
class GoModel(nn.Module): class GoModel(nn.Module):
@ -52,50 +56,44 @@ class GoModel(nn.Module):
super(GoModel, self).__init__() super(GoModel, self).__init__()
self.net = torch.nn.Sequential( self.net = torch.nn.Sequential(
nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False), nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(16), nn.BatchNorm2d(16),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False),
nn.Conv2d(16, 32, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(32),
nn.BatchNorm2d(32), torch.nn.ReLU(),
torch.nn.ReLU(), nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.Conv2d(32, 64, kernel_size=3, padding=1, bias=False), nn.Dropout(0.4),
nn.BatchNorm2d(64), torch.nn.ReLU(),
nn.Dropout(0.4), nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False),
torch.nn.ReLU(), nn.BatchNorm2d(128),
torch.nn.ReLU(),
nn.Conv2d(64, 128, kernel_size=3, padding=1, bias=False), nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
nn.BatchNorm2d(128), nn.BatchNorm2d(128),
torch.nn.ReLU(), torch.nn.ReLU(),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(128),
nn.BatchNorm2d(128), torch.nn.ReLU(),
torch.nn.ReLU(), nn.Flatten(),
nn.Linear(128 * 8 * 8, 128),
nn.Conv2d(128, 128, kernel_size=3, padding=1, bias=False), nn.BatchNorm1d(128),
nn.BatchNorm2d(128), torch.nn.ReLU(),
torch.nn.ReLU(), nn.Dropout(0.4),
nn.Linear(128, 1),
nn.Flatten(), nn.Sigmoid(),
nn.Linear(128 * 8 * 8, 128),
nn.BatchNorm1d(128),
torch.nn.ReLU(),
nn.Dropout(0.4),
nn.Linear(128, 1),
nn.Sigmoid()
) )
def forward(self, x): def forward(self, x):
if self.training: if self.training:
return self.net(x) return self.net(x)
else: else:
y = self.net(x) y = self.net(x)
batch_size = x.size(0) batch_size = x.size(0)
x_rotated = torch.stack([torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1) # x_rotated: [batch_size, 4, 3, 8, 8] x_rotated = torch.stack(
[torch.rot90(x, k=k, dims=[2, 3]) for k in range(4)], dim=1
) # x_rotated: [batch_size, 4, 3, 8, 8]
x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8] x_rotated = x_rotated.view(-1, 3, 8, 8) # [batch_size*4, 3, 8, 8]
with torch.no_grad(): with torch.no_grad():
@ -103,7 +101,7 @@ class GoModel(nn.Module):
# Reshape to get them by rotation # Reshape to get them by rotation
y_rotated = y_rotated.view(batch_size, 4, -1) # [batch_size, 4, 1] y_rotated = y_rotated.view(batch_size, 4, -1) # [batch_size, 4, 1]
y_mean = y_rotated.mean(dim=1) # [batch_size, 1] y_mean = y_rotated.mean(dim=1) # [batch_size, 1]
return y_mean return y_mean
@ -118,38 +116,44 @@ class myPlayer(PlayerInterface):
def __init__(self): def __init__(self):
self._board = Goban.Board() self._board = Goban.Board()
self._mycolor = None self._mycolor = None
self.last_op_move = -2
self.device = setup_device() self.device = setup_device()
print(self.device)
self.model = GoModel().to(self.device) self.model = GoModel().to(self.device)
checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device) checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
self.model.load_state_dict(checkpoint["model_state_dict"]) self.model.load_state_dict(checkpoint["model_state_dict"])
self.last_op_move = None
self.maxtime = 1800 self.maxtime = 1800
self.time = 0 self.time = 0
# Load plays for the opening
self.plays = []
with open("plays-8x8.json") as f:
plays = json.load(f)
# Only keep the plays we want
l = "W" if self._mycolor == Goban.Board._WHITE else "B"
filtered = filter(lambda t: l in t["result"], plays)
# We sort to take the most advantageous openings
lp = l + "+"
for el in filtered:
el["result"] = float(el["result"].replace(lp, ""))
self.plays.append(el)
self.plays.sort(key=lambda t: t["result"])
def getPlayerName(self): def getPlayerName(self):
return "xXx_7h3_5cRuM_M45T3r_xXx" return "xXx_7h3_5cRuM_M45T3r_xXx"
@staticmethod
def simple_heuristic(board, color):
# Simple stone difference heuristic
score = board.compute_score()
return (
score[0] - score[1] if color == Goban.Board._BLACK else score[1] - score[0]
)
def nnheuristic(self, board: Goban.Board, color): def nnheuristic(self, board: Goban.Board, color):
if board.is_game_over(): if board.is_game_over():
if board.winner() == board._EMPTY: if board.winner() == board._EMPTY:
return 0.5 return 0.5
return math.inf if board.winner() == color else -math.inf return math.inf if board.winner() == self._mycolor else -math.inf
go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device) go_board = goban2Go(board).to(self.device)
self.model.eval() self.model.eval()
with torch.no_grad(): with torch.no_grad():
@ -166,47 +170,68 @@ class myPlayer(PlayerInterface):
print("Referee told me to play but the game is over!") print("Referee told me to play but the game is over!")
return "PASS" return "PASS"
duration = 1. # Take more time in endgame
if self._board._nbBLACK + self._board._nbWHITE < 10: if self._board._nbBLACK + self._board._nbWHITE < 10:
duration = 3
elif self._board._nbBLACK + self._board._nbWHITE < 30:
duration = 5 duration = 5
elif self._board._nbBLACK + self._board._nbWHITE > 40: elif self._board._nbBLACK + self._board._nbWHITE < 40:
duration = 64 - (self._board._nbBLACK + self._board._nbWHITE) duration = 20
duration = min(duration, (self.maxtime - self.time) / 10)
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
if self.last_op_move == "PASS" and self._board.diff_stones_board() * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
move = -1
score = math.inf
else: else:
move, score = IDDFS( duration = 30
self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=64
# Play quickly if lack of time
duration = min(duration, (self.maxtime - self.time) / 10)
move = -1
b, w = self._board.compute_score()
# If passing wins the game, pass
if (
self.last_op_move == -1
and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0
):
move = -1
# Play greedily opening moves early in the game
elif self._board._nbBLACK + self._board._nbWHITE < 10:
turn = self._board._nbBLACK + self._board._nbWHITE
for play in self.plays:
if (
len(play["moves"]) > turn
and Goban.Board.name_to_flat(play["moves"][turn])
in self._board.legal_moves()
):
move = Goban.Board.name_to_flat(play["moves"][turn])
# Use iddfs alphabeta
else:
move = IDDFS(
self._board,
self.nnheuristic,
self._mycolor,
duration=duration,
max_depth=64,
) )
self._board.push(move)
print(move, score, file=stderr)
nd = time.time() nd = time.time()
self.time += (nd - st) self.time += nd - st
# New here: allows to consider internal representations of moves print(move, (nd - st), file=stderr)
# move is an internal representation. To communicate with the interface I need to change if to a string
self._board.push(move)
return Goban.Board.flat_to_name(move) return Goban.Board.flat_to_name(move)
def playOpponentMove(self, move): def playOpponentMove(self, move):
print("Opponent played ", move) # New here print("Opponent played ", move) # New here
# the board needs an internal represetation to push the move. Not a string
self._board.push(Goban.Board.name_to_flat(move)) self._board.push(Goban.Board.name_to_flat(move))
self.last_op_move = move self.last_op_move = Goban.Board.name_to_flat(move)
def newGame(self, color): def newGame(self, color):
self._board = Goban.Board()
self._mycolor = color self._mycolor = color
self._opponent = Goban.Board.flip(color) self.last_op_move = -2
self.time = 0
def endGame(self, winner): def endGame(self, winner):
if self._mycolor == winner: if self._mycolor == winner:

1
go_player/plays-8x8.json Normal file

File diff suppressed because one or more lines are too long