From 0e4d30fa0d9b84a8125166af17b4d763d0028297 Mon Sep 17 00:00:00 2001 From: Nemo D'ACREMONT Date: Sun, 18 May 2025 19:50:41 +0200 Subject: [PATCH] final --- go_player/Makefile | 29 +++++++++ go_player/README.md | 5 ++ go_player/localGame.py | 3 +- go_player/moveSearch.py | 136 ++++++++++++++++++---------------------- go_player/myPlayer.py | 65 +++++++++---------- 5 files changed, 130 insertions(+), 108 deletions(-) create mode 100644 go_player/Makefile create mode 100644 go_player/README.md diff --git a/go_player/Makefile b/go_player/Makefile new file mode 100644 index 0000000..c53be46 --- /dev/null +++ b/go_player/Makefile @@ -0,0 +1,29 @@ +files += tp_player-ndacremont_meyben/README.md +files += tp_player-ndacremont_meyben/scrum.pt +files += tp_player-ndacremont_meyben/plays-8x8.json +files += tp_player-ndacremont_meyben/localGame.py +files += tp_player-ndacremont_meyben/namedGame.py +files += tp_player-ndacremont_meyben/Goban.py +files += tp_player-ndacremont_meyben/myPlayer.py +files += tp_player-ndacremont_meyben/moveSearch.py +files += tp_player-ndacremont_meyben/playerInterface.py +files += tp_player-ndacremont_meyben/requirements.txt + +.PHONY += all +all: tp_player-ndacremont_meyben.tar.gz + +tp_player-ndacremont_meyben.tar.gz: tp_player-ndacremont_meyben $(files) + tar -cvzf $@ $^ + +tp_player-ndacremont_meyben/%: % + cp $^ $@ + +tp_player-ndacremont_meyben: + mkdir -p $@ + +.PHONY += clean +clean: + $(RM) -r tp_player-ndacremont_meyben tp_player-ndacremont_meyben.tar.gz + + +.PHONY: $(PHONY) diff --git a/go_player/README.md b/go_player/README.md new file mode 100644 index 0000000..0a47ca3 --- /dev/null +++ b/go_player/README.md @@ -0,0 +1,5 @@ +# TP Noté joueur Go -- Nemo D'ACREMONT, Martin EYBEN, G1 + + + + diff --git a/go_player/localGame.py b/go_player/localGame.py index 1669c29..c6c0802 100644 --- a/go_player/localGame.py +++ b/go_player/localGame.py @@ -2,7 +2,6 @@ import Goban import myPlayer -import gnugoPlayer import time from io import StringIO import sys @@ -14,7 +13,7 @@ player1 = myPlayer.myPlayer() player1.newGame(Goban.Board._BLACK) players.append(player1) -player2 = gnugoPlayer.myPlayer() +player2 = myPlayer.myPlayer() player2.newGame(Goban.Board._WHITE) players.append(player2) diff --git a/go_player/moveSearch.py b/go_player/moveSearch.py index f088c4c..d35bc5e 100644 --- a/go_player/moveSearch.py +++ b/go_player/moveSearch.py @@ -4,85 +4,74 @@ import math from typing import Any, Callable import Goban - -# Returns heuristic, move def _alphabeta( board: Goban.Board, heuristic: Callable[[Goban.Board, Any], float], color, - move, alpha=-math.inf, beta=math.inf, depth: int = 3, - shouldStop = lambda: False + shouldStop=lambda: False ) -> tuple[float, Any]: + if board.is_game_over() or depth == 0: + return heuristic(board, color), None - wantMax = (board.next_player == color) - if depth == 0 or board.is_game_over(): - return heuristic(board, board.next_player()), move + wantMax = (board.next_player() == color) + best_move = None if wantMax: - acc = -math.inf, None + acc = -math.inf for move in board.generate_legal_moves(): if Goban.Board.flat_to_name(move) == "PASS": continue board.push(move) - value = ( - _alphabeta( - board, - alpha=alpha, - beta=beta, - move=move, - heuristic=heuristic, - color=color, - depth=depth - 1, - )[0], - move, - ) - acc = max( - acc, - value, - key=lambda t: t[0], - ) + value = _alphabeta( + board, + heuristic=heuristic, + color=color, + alpha=alpha, + beta=beta, + depth=depth - 1, + shouldStop=shouldStop + )[0] board.pop() - if shouldStop() or acc[0] >= beta: + if value > acc: + acc = value + best_move = move + + alpha = max(alpha, acc) + if shouldStop() or acc >= beta: break # beta cutoff - alpha = max(alpha, acc[0]) else: - acc = math.inf, None + acc = math.inf for move in board.generate_legal_moves(): if Goban.Board.flat_to_name(move) == "PASS": continue board.push(move) - value = ( - _alphabeta( - board, - alpha=alpha, - beta=beta, - move=move, - heuristic=heuristic, - color=color, - depth=depth - 1, - )[0], - move, - ) - acc = min( - acc, - value, - key=lambda t: t[0], - ) + value = _alphabeta( + board, + heuristic=heuristic, + color=color, + alpha=alpha, + beta=beta, + depth=depth - 1, + shouldStop=shouldStop + )[0] board.pop() - if shouldStop() or acc[0] <= alpha: + if value < acc: + acc = value + best_move = move + + beta = min(beta, acc) + if shouldStop() or acc <= alpha: break # alpha cutoff - beta = min(beta, acc[0]) - - return acc + return acc, best_move def alphabeta( board: Goban.Board, @@ -90,36 +79,35 @@ def alphabeta( color, depth: int = 3, ): - _, move = _alphabeta(board, move=-1, heuristic=heuristic, color=color, depth=depth) + _, move = _alphabeta(board, heuristic=heuristic, color=color, depth=depth) return move -def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42): - st = time.time() - shouldStop = (lambda: time.time() - st > duration) - depth = 0 - move = -1 - score = -1 +def IDDFS( + board: Goban.Board, + heuristic: Callable[[Goban.Board, Any], float], + color, + max_depth: int = 10, + duration: float = 5.0 # Duration in seconds +): + best_move = None + start_time = time.time() + shouldStop = lambda: (time.time() - start_time) >= duration - while not shouldStop() and depth <= maxdepth: - if depth % 2 == 0: - score, move = _alphabeta( - board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop - ) + for depth in range(1, max_depth + 1): - if score == math.inf: - return move, score + value, move = _alphabeta( + board, + heuristic=heuristic, + color=color, + depth=depth, + shouldStop=shouldStop + ) - else: - score, move = _alphabeta( - board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop - ) + if shouldStop(): + break - if score == -math.inf: - return move, score + print(f"{depth}, {value}", file=stderr) + best_move = move - print("depth:", depth, time.time() - st, score, file=stderr) - depth += 1 - - print(time.time() - st, duration, depth, file=stderr) - return move, score + return best_move diff --git a/go_player/myPlayer.py b/go_player/myPlayer.py index a6a3209..5368da2 100644 --- a/go_player/myPlayer.py +++ b/go_player/myPlayer.py @@ -32,6 +32,9 @@ def setup_device(): def goban2Go(board: Goban.Board): + """ + Convert a goban board to a tensor for the model + """ goBoard = torch.zeros((3, 8, 8), dtype=torch.float32) black_plays = (board.next_player() == Goban.Board._BLACK) @@ -119,33 +122,32 @@ class myPlayer(PlayerInterface): def __init__(self): self._board = Goban.Board() self._mycolor = None + self.last_op_move = -2 self.device = setup_device() - print(self.device) - self.model = GoModel().to(self.device) - checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device) self.model.load_state_dict(checkpoint["model_state_dict"]) - self.last_op_move = None self.maxtime = 1800 self.time = 0 + # Load plays for the opening self.plays = [] with open("plays-8x8.json") as f: plays = json.load(f) + # Only keep the plays we want l = "W" if self._mycolor == Goban.Board._WHITE else "B" filtered = filter(lambda t: l in t["result"], plays) + # We sort to take the most advantageous openings lp = l + "+" for el in filtered: el["result"] = float(el["result"].replace(lp, "")) self.plays.append(el) self.plays.sort(key=lambda t: t["result"]) - self.turn = 0 def getPlayerName(self): return "xXx_7h3_5cRuM_M45T3r_xXx" @@ -155,7 +157,7 @@ class myPlayer(PlayerInterface): if board.winner() == board._EMPTY: return 0.5 - return math.inf if board.winner() == color else -math.inf + return math.inf if board.winner() == self._mycolor else -math.inf go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device) @@ -174,58 +176,57 @@ class myPlayer(PlayerInterface): print("Referee told me to play but the game is over!") return "PASS" - duration = 1. - + # Take more time in endgame if self._board._nbBLACK + self._board._nbWHITE < 10: - duration = 1 + duration = 5 - elif self._board._nbBLACK + self._board._nbWHITE < 30: - duration = 1 + elif self._board._nbBLACK + self._board._nbWHITE < 40: + duration = 20 - elif self._board._nbBLACK + self._board._nbWHITE > 40: - duration = 1 # 64 - (self._board._nbBLACK + self._board._nbWHITE) + else: + duration = 30 + # Play quickly if lack of time duration = min(duration, (self.maxtime - self.time) / 10) + move = -1 - score = 0 - b, w = self._board.compute_score() - # move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1) - if self.last_op_move == "PASS" and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0: - move = -1 - score = math.inf - elif self._board._nbBLACK + self._board._nbWHITE < 20: + # If passing wins the game, pass + if self.last_op_move == -1 and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0: + move = -1 + + # Play greedily opening moves early in the game + elif self._board._nbBLACK + self._board._nbWHITE < 10: turn = self._board._nbBLACK + self._board._nbWHITE for play in self.plays: if len(play["moves"]) > turn and Goban.Board.name_to_flat(play["moves"][turn]) in self._board.legal_moves(): move = Goban.Board.name_to_flat(play["moves"][turn]) - score = 1 - elif move == -1: - move, score = IDDFS( - self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=64 + # Use iddfs alphabeta + else: + move = IDDFS( + self._board, self.nnheuristic, self._mycolor, duration=duration, max_depth=64 ) - self._board.push(move) - print(move, score, file=stderr) nd = time.time() self.time += (nd - st) - self.turn += 1 - # New here: allows to consider internal representations of moves - # move is an internal representation. To communicate with the interface I need to change if to a string + print(move, (nd - st), file=stderr) + + self._board.push(move) return Goban.Board.flat_to_name(move) def playOpponentMove(self, move): print("Opponent played ", move) # New here - # the board needs an internal represetation to push the move. Not a string self._board.push(Goban.Board.name_to_flat(move)) - self.last_op_move = move - self.turn += 1 + self.last_op_move = Goban.Board.name_to_flat(move) def newGame(self, color): + self._board = Goban.Board() self._mycolor = color + self.last_op_move = -2 + self.time = 0 def endGame(self, winner): if self._mycolor == winner: