This commit is contained in:
Nemo D'ACREMONT 2025-05-18 19:50:41 +02:00
parent 15a834488b
commit 0e4d30fa0d
5 changed files with 130 additions and 108 deletions

29
go_player/Makefile Normal file
View File

@ -0,0 +1,29 @@
files += tp_player-ndacremont_meyben/README.md
files += tp_player-ndacremont_meyben/scrum.pt
files += tp_player-ndacremont_meyben/plays-8x8.json
files += tp_player-ndacremont_meyben/localGame.py
files += tp_player-ndacremont_meyben/namedGame.py
files += tp_player-ndacremont_meyben/Goban.py
files += tp_player-ndacremont_meyben/myPlayer.py
files += tp_player-ndacremont_meyben/moveSearch.py
files += tp_player-ndacremont_meyben/playerInterface.py
files += tp_player-ndacremont_meyben/requirements.txt
.PHONY += all
all: tp_player-ndacremont_meyben.tar.gz
tp_player-ndacremont_meyben.tar.gz: tp_player-ndacremont_meyben $(files)
tar -cvzf $@ $^
tp_player-ndacremont_meyben/%: %
cp $^ $@
tp_player-ndacremont_meyben:
mkdir -p $@
.PHONY += clean
clean:
$(RM) -r tp_player-ndacremont_meyben tp_player-ndacremont_meyben.tar.gz
.PHONY: $(PHONY)

5
go_player/README.md Normal file
View File

@ -0,0 +1,5 @@
# TP Noté joueur Go -- Nemo D'ACREMONT, Martin EYBEN, G1

View File

@ -2,7 +2,6 @@
import Goban import Goban
import myPlayer import myPlayer
import gnugoPlayer
import time import time
from io import StringIO from io import StringIO
import sys import sys
@ -14,7 +13,7 @@ player1 = myPlayer.myPlayer()
player1.newGame(Goban.Board._BLACK) player1.newGame(Goban.Board._BLACK)
players.append(player1) players.append(player1)
player2 = gnugoPlayer.myPlayer() player2 = myPlayer.myPlayer()
player2.newGame(Goban.Board._WHITE) player2.newGame(Goban.Board._WHITE)
players.append(player2) players.append(player2)

View File

@ -4,85 +4,74 @@ import math
from typing import Any, Callable from typing import Any, Callable
import Goban import Goban
# Returns heuristic, move
def _alphabeta( def _alphabeta(
board: Goban.Board, board: Goban.Board,
heuristic: Callable[[Goban.Board, Any], float], heuristic: Callable[[Goban.Board, Any], float],
color, color,
move,
alpha=-math.inf, alpha=-math.inf,
beta=math.inf, beta=math.inf,
depth: int = 3, depth: int = 3,
shouldStop = lambda: False shouldStop=lambda: False
) -> tuple[float, Any]: ) -> tuple[float, Any]:
if board.is_game_over() or depth == 0:
return heuristic(board, color), None
wantMax = (board.next_player == color) wantMax = (board.next_player() == color)
if depth == 0 or board.is_game_over(): best_move = None
return heuristic(board, board.next_player()), move
if wantMax: if wantMax:
acc = -math.inf, None acc = -math.inf
for move in board.generate_legal_moves(): for move in board.generate_legal_moves():
if Goban.Board.flat_to_name(move) == "PASS": if Goban.Board.flat_to_name(move) == "PASS":
continue continue
board.push(move) board.push(move)
value = ( value = _alphabeta(
_alphabeta( board,
board, heuristic=heuristic,
alpha=alpha, color=color,
beta=beta, alpha=alpha,
move=move, beta=beta,
heuristic=heuristic, depth=depth - 1,
color=color, shouldStop=shouldStop
depth=depth - 1, )[0]
)[0],
move,
)
acc = max(
acc,
value,
key=lambda t: t[0],
)
board.pop() board.pop()
if shouldStop() or acc[0] >= beta: if value > acc:
acc = value
best_move = move
alpha = max(alpha, acc)
if shouldStop() or acc >= beta:
break # beta cutoff break # beta cutoff
alpha = max(alpha, acc[0])
else: else:
acc = math.inf, None acc = math.inf
for move in board.generate_legal_moves(): for move in board.generate_legal_moves():
if Goban.Board.flat_to_name(move) == "PASS": if Goban.Board.flat_to_name(move) == "PASS":
continue continue
board.push(move) board.push(move)
value = ( value = _alphabeta(
_alphabeta( board,
board, heuristic=heuristic,
alpha=alpha, color=color,
beta=beta, alpha=alpha,
move=move, beta=beta,
heuristic=heuristic, depth=depth - 1,
color=color, shouldStop=shouldStop
depth=depth - 1, )[0]
)[0],
move,
)
acc = min(
acc,
value,
key=lambda t: t[0],
)
board.pop() board.pop()
if shouldStop() or acc[0] <= alpha: if value < acc:
acc = value
best_move = move
beta = min(beta, acc)
if shouldStop() or acc <= alpha:
break # alpha cutoff break # alpha cutoff
beta = min(beta, acc[0])
return acc
return acc, best_move
def alphabeta( def alphabeta(
board: Goban.Board, board: Goban.Board,
@ -90,36 +79,35 @@ def alphabeta(
color, color,
depth: int = 3, depth: int = 3,
): ):
_, move = _alphabeta(board, move=-1, heuristic=heuristic, color=color, depth=depth) _, move = _alphabeta(board, heuristic=heuristic, color=color, depth=depth)
return move return move
def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42): def IDDFS(
st = time.time() board: Goban.Board,
shouldStop = (lambda: time.time() - st > duration) heuristic: Callable[[Goban.Board, Any], float],
depth = 0 color,
move = -1 max_depth: int = 10,
score = -1 duration: float = 5.0 # Duration in seconds
):
best_move = None
start_time = time.time()
shouldStop = lambda: (time.time() - start_time) >= duration
while not shouldStop() and depth <= maxdepth: for depth in range(1, max_depth + 1):
if depth % 2 == 0:
score, move = _alphabeta(
board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
)
if score == math.inf: value, move = _alphabeta(
return move, score board,
heuristic=heuristic,
color=color,
depth=depth,
shouldStop=shouldStop
)
else: if shouldStop():
score, move = _alphabeta( break
board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
)
if score == -math.inf: print(f"{depth}, {value}", file=stderr)
return move, score best_move = move
print("depth:", depth, time.time() - st, score, file=stderr) return best_move
depth += 1
print(time.time() - st, duration, depth, file=stderr)
return move, score

View File

@ -32,6 +32,9 @@ def setup_device():
def goban2Go(board: Goban.Board): def goban2Go(board: Goban.Board):
"""
Convert a goban board to a tensor for the model
"""
goBoard = torch.zeros((3, 8, 8), dtype=torch.float32) goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
black_plays = (board.next_player() == Goban.Board._BLACK) black_plays = (board.next_player() == Goban.Board._BLACK)
@ -119,33 +122,32 @@ class myPlayer(PlayerInterface):
def __init__(self): def __init__(self):
self._board = Goban.Board() self._board = Goban.Board()
self._mycolor = None self._mycolor = None
self.last_op_move = -2
self.device = setup_device() self.device = setup_device()
print(self.device)
self.model = GoModel().to(self.device) self.model = GoModel().to(self.device)
checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device) checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
self.model.load_state_dict(checkpoint["model_state_dict"]) self.model.load_state_dict(checkpoint["model_state_dict"])
self.last_op_move = None
self.maxtime = 1800 self.maxtime = 1800
self.time = 0 self.time = 0
# Load plays for the opening
self.plays = [] self.plays = []
with open("plays-8x8.json") as f: with open("plays-8x8.json") as f:
plays = json.load(f) plays = json.load(f)
# Only keep the plays we want
l = "W" if self._mycolor == Goban.Board._WHITE else "B" l = "W" if self._mycolor == Goban.Board._WHITE else "B"
filtered = filter(lambda t: l in t["result"], plays) filtered = filter(lambda t: l in t["result"], plays)
# We sort to take the most advantageous openings
lp = l + "+" lp = l + "+"
for el in filtered: for el in filtered:
el["result"] = float(el["result"].replace(lp, "")) el["result"] = float(el["result"].replace(lp, ""))
self.plays.append(el) self.plays.append(el)
self.plays.sort(key=lambda t: t["result"]) self.plays.sort(key=lambda t: t["result"])
self.turn = 0
def getPlayerName(self): def getPlayerName(self):
return "xXx_7h3_5cRuM_M45T3r_xXx" return "xXx_7h3_5cRuM_M45T3r_xXx"
@ -155,7 +157,7 @@ class myPlayer(PlayerInterface):
if board.winner() == board._EMPTY: if board.winner() == board._EMPTY:
return 0.5 return 0.5
return math.inf if board.winner() == color else -math.inf return math.inf if board.winner() == self._mycolor else -math.inf
go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device) go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)
@ -174,58 +176,57 @@ class myPlayer(PlayerInterface):
print("Referee told me to play but the game is over!") print("Referee told me to play but the game is over!")
return "PASS" return "PASS"
duration = 1. # Take more time in endgame
if self._board._nbBLACK + self._board._nbWHITE < 10: if self._board._nbBLACK + self._board._nbWHITE < 10:
duration = 1 duration = 5
elif self._board._nbBLACK + self._board._nbWHITE < 30: elif self._board._nbBLACK + self._board._nbWHITE < 40:
duration = 1 duration = 20
elif self._board._nbBLACK + self._board._nbWHITE > 40: else:
duration = 1 # 64 - (self._board._nbBLACK + self._board._nbWHITE) duration = 30
# Play quickly if lack of time
duration = min(duration, (self.maxtime - self.time) / 10) duration = min(duration, (self.maxtime - self.time) / 10)
move = -1 move = -1
score = 0
b, w = self._board.compute_score() b, w = self._board.compute_score()
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
if self.last_op_move == "PASS" and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
move = -1
score = math.inf
elif self._board._nbBLACK + self._board._nbWHITE < 20: # If passing wins the game, pass
if self.last_op_move == -1 and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
move = -1
# Play greedily opening moves early in the game
elif self._board._nbBLACK + self._board._nbWHITE < 10:
turn = self._board._nbBLACK + self._board._nbWHITE turn = self._board._nbBLACK + self._board._nbWHITE
for play in self.plays: for play in self.plays:
if len(play["moves"]) > turn and Goban.Board.name_to_flat(play["moves"][turn]) in self._board.legal_moves(): if len(play["moves"]) > turn and Goban.Board.name_to_flat(play["moves"][turn]) in self._board.legal_moves():
move = Goban.Board.name_to_flat(play["moves"][turn]) move = Goban.Board.name_to_flat(play["moves"][turn])
score = 1
elif move == -1: # Use iddfs alphabeta
move, score = IDDFS( else:
self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=64 move = IDDFS(
self._board, self.nnheuristic, self._mycolor, duration=duration, max_depth=64
) )
self._board.push(move)
print(move, score, file=stderr)
nd = time.time() nd = time.time()
self.time += (nd - st) self.time += (nd - st)
self.turn += 1
# New here: allows to consider internal representations of moves print(move, (nd - st), file=stderr)
# move is an internal representation. To communicate with the interface I need to change if to a string
self._board.push(move)
return Goban.Board.flat_to_name(move) return Goban.Board.flat_to_name(move)
def playOpponentMove(self, move): def playOpponentMove(self, move):
print("Opponent played ", move) # New here print("Opponent played ", move) # New here
# the board needs an internal represetation to push the move. Not a string
self._board.push(Goban.Board.name_to_flat(move)) self._board.push(Goban.Board.name_to_flat(move))
self.last_op_move = move self.last_op_move = Goban.Board.name_to_flat(move)
self.turn += 1
def newGame(self, color): def newGame(self, color):
self._board = Goban.Board()
self._mycolor = color self._mycolor = color
self.last_op_move = -2
self.time = 0
def endGame(self, winner): def endGame(self, winner):
if self._mycolor == winner: if self._mycolor == winner: