final
This commit is contained in:
parent
15a834488b
commit
0e4d30fa0d
29
go_player/Makefile
Normal file
29
go_player/Makefile
Normal file
@ -0,0 +1,29 @@
|
||||
files += tp_player-ndacremont_meyben/README.md
|
||||
files += tp_player-ndacremont_meyben/scrum.pt
|
||||
files += tp_player-ndacremont_meyben/plays-8x8.json
|
||||
files += tp_player-ndacremont_meyben/localGame.py
|
||||
files += tp_player-ndacremont_meyben/namedGame.py
|
||||
files += tp_player-ndacremont_meyben/Goban.py
|
||||
files += tp_player-ndacremont_meyben/myPlayer.py
|
||||
files += tp_player-ndacremont_meyben/moveSearch.py
|
||||
files += tp_player-ndacremont_meyben/playerInterface.py
|
||||
files += tp_player-ndacremont_meyben/requirements.txt
|
||||
|
||||
.PHONY += all
|
||||
all: tp_player-ndacremont_meyben.tar.gz
|
||||
|
||||
tp_player-ndacremont_meyben.tar.gz: tp_player-ndacremont_meyben $(files)
|
||||
tar -cvzf $@ $^
|
||||
|
||||
tp_player-ndacremont_meyben/%: %
|
||||
cp $^ $@
|
||||
|
||||
tp_player-ndacremont_meyben:
|
||||
mkdir -p $@
|
||||
|
||||
.PHONY += clean
|
||||
clean:
|
||||
$(RM) -r tp_player-ndacremont_meyben tp_player-ndacremont_meyben.tar.gz
|
||||
|
||||
|
||||
.PHONY: $(PHONY)
|
5
go_player/README.md
Normal file
5
go_player/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
# TP Noté joueur Go -- Nemo D'ACREMONT, Martin EYBEN, G1
|
||||
|
||||
|
||||
|
||||
|
@ -2,7 +2,6 @@
|
||||
|
||||
import Goban
|
||||
import myPlayer
|
||||
import gnugoPlayer
|
||||
import time
|
||||
from io import StringIO
|
||||
import sys
|
||||
@ -14,7 +13,7 @@ player1 = myPlayer.myPlayer()
|
||||
player1.newGame(Goban.Board._BLACK)
|
||||
players.append(player1)
|
||||
|
||||
player2 = gnugoPlayer.myPlayer()
|
||||
player2 = myPlayer.myPlayer()
|
||||
player2.newGame(Goban.Board._WHITE)
|
||||
players.append(player2)
|
||||
|
||||
|
@ -4,85 +4,74 @@ import math
|
||||
from typing import Any, Callable
|
||||
import Goban
|
||||
|
||||
|
||||
# Returns heuristic, move
|
||||
def _alphabeta(
|
||||
board: Goban.Board,
|
||||
heuristic: Callable[[Goban.Board, Any], float],
|
||||
color,
|
||||
move,
|
||||
alpha=-math.inf,
|
||||
beta=math.inf,
|
||||
depth: int = 3,
|
||||
shouldStop = lambda: False
|
||||
shouldStop=lambda: False
|
||||
) -> tuple[float, Any]:
|
||||
if board.is_game_over() or depth == 0:
|
||||
return heuristic(board, color), None
|
||||
|
||||
wantMax = (board.next_player == color)
|
||||
if depth == 0 or board.is_game_over():
|
||||
return heuristic(board, board.next_player()), move
|
||||
wantMax = (board.next_player() == color)
|
||||
best_move = None
|
||||
|
||||
if wantMax:
|
||||
acc = -math.inf, None
|
||||
acc = -math.inf
|
||||
for move in board.generate_legal_moves():
|
||||
if Goban.Board.flat_to_name(move) == "PASS":
|
||||
continue
|
||||
|
||||
board.push(move)
|
||||
value = (
|
||||
_alphabeta(
|
||||
value = _alphabeta(
|
||||
board,
|
||||
alpha=alpha,
|
||||
beta=beta,
|
||||
move=move,
|
||||
heuristic=heuristic,
|
||||
color=color,
|
||||
alpha=alpha,
|
||||
beta=beta,
|
||||
depth=depth - 1,
|
||||
)[0],
|
||||
move,
|
||||
)
|
||||
acc = max(
|
||||
acc,
|
||||
value,
|
||||
key=lambda t: t[0],
|
||||
)
|
||||
shouldStop=shouldStop
|
||||
)[0]
|
||||
board.pop()
|
||||
|
||||
if shouldStop() or acc[0] >= beta:
|
||||
if value > acc:
|
||||
acc = value
|
||||
best_move = move
|
||||
|
||||
alpha = max(alpha, acc)
|
||||
if shouldStop() or acc >= beta:
|
||||
break # beta cutoff
|
||||
alpha = max(alpha, acc[0])
|
||||
|
||||
else:
|
||||
acc = math.inf, None
|
||||
acc = math.inf
|
||||
for move in board.generate_legal_moves():
|
||||
if Goban.Board.flat_to_name(move) == "PASS":
|
||||
continue
|
||||
|
||||
board.push(move)
|
||||
value = (
|
||||
_alphabeta(
|
||||
value = _alphabeta(
|
||||
board,
|
||||
alpha=alpha,
|
||||
beta=beta,
|
||||
move=move,
|
||||
heuristic=heuristic,
|
||||
color=color,
|
||||
alpha=alpha,
|
||||
beta=beta,
|
||||
depth=depth - 1,
|
||||
)[0],
|
||||
move,
|
||||
)
|
||||
acc = min(
|
||||
acc,
|
||||
value,
|
||||
key=lambda t: t[0],
|
||||
)
|
||||
shouldStop=shouldStop
|
||||
)[0]
|
||||
board.pop()
|
||||
|
||||
if shouldStop() or acc[0] <= alpha:
|
||||
if value < acc:
|
||||
acc = value
|
||||
best_move = move
|
||||
|
||||
beta = min(beta, acc)
|
||||
if shouldStop() or acc <= alpha:
|
||||
break # alpha cutoff
|
||||
beta = min(beta, acc[0])
|
||||
|
||||
return acc
|
||||
|
||||
return acc, best_move
|
||||
|
||||
def alphabeta(
|
||||
board: Goban.Board,
|
||||
@ -90,36 +79,35 @@ def alphabeta(
|
||||
color,
|
||||
depth: int = 3,
|
||||
):
|
||||
_, move = _alphabeta(board, move=-1, heuristic=heuristic, color=color, depth=depth)
|
||||
_, move = _alphabeta(board, heuristic=heuristic, color=color, depth=depth)
|
||||
return move
|
||||
|
||||
|
||||
def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42):
|
||||
st = time.time()
|
||||
shouldStop = (lambda: time.time() - st > duration)
|
||||
depth = 0
|
||||
move = -1
|
||||
score = -1
|
||||
def IDDFS(
|
||||
board: Goban.Board,
|
||||
heuristic: Callable[[Goban.Board, Any], float],
|
||||
color,
|
||||
max_depth: int = 10,
|
||||
duration: float = 5.0 # Duration in seconds
|
||||
):
|
||||
best_move = None
|
||||
start_time = time.time()
|
||||
shouldStop = lambda: (time.time() - start_time) >= duration
|
||||
|
||||
while not shouldStop() and depth <= maxdepth:
|
||||
if depth % 2 == 0:
|
||||
score, move = _alphabeta(
|
||||
board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
|
||||
for depth in range(1, max_depth + 1):
|
||||
|
||||
value, move = _alphabeta(
|
||||
board,
|
||||
heuristic=heuristic,
|
||||
color=color,
|
||||
depth=depth,
|
||||
shouldStop=shouldStop
|
||||
)
|
||||
|
||||
if score == math.inf:
|
||||
return move, score
|
||||
if shouldStop():
|
||||
break
|
||||
|
||||
else:
|
||||
score, move = _alphabeta(
|
||||
board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
|
||||
)
|
||||
print(f"{depth}, {value}", file=stderr)
|
||||
best_move = move
|
||||
|
||||
if score == -math.inf:
|
||||
return move, score
|
||||
|
||||
print("depth:", depth, time.time() - st, score, file=stderr)
|
||||
depth += 1
|
||||
|
||||
print(time.time() - st, duration, depth, file=stderr)
|
||||
return move, score
|
||||
return best_move
|
||||
|
@ -32,6 +32,9 @@ def setup_device():
|
||||
|
||||
|
||||
def goban2Go(board: Goban.Board):
|
||||
"""
|
||||
Convert a goban board to a tensor for the model
|
||||
"""
|
||||
goBoard = torch.zeros((3, 8, 8), dtype=torch.float32)
|
||||
black_plays = (board.next_player() == Goban.Board._BLACK)
|
||||
|
||||
@ -119,33 +122,32 @@ class myPlayer(PlayerInterface):
|
||||
def __init__(self):
|
||||
self._board = Goban.Board()
|
||||
self._mycolor = None
|
||||
self.last_op_move = -2
|
||||
|
||||
self.device = setup_device()
|
||||
print(self.device)
|
||||
|
||||
self.model = GoModel().to(self.device)
|
||||
|
||||
checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
|
||||
self.model.load_state_dict(checkpoint["model_state_dict"])
|
||||
self.last_op_move = None
|
||||
|
||||
self.maxtime = 1800
|
||||
self.time = 0
|
||||
|
||||
# Load plays for the opening
|
||||
self.plays = []
|
||||
with open("plays-8x8.json") as f:
|
||||
plays = json.load(f)
|
||||
|
||||
# Only keep the plays we want
|
||||
l = "W" if self._mycolor == Goban.Board._WHITE else "B"
|
||||
filtered = filter(lambda t: l in t["result"], plays)
|
||||
|
||||
# We sort to take the most advantageous openings
|
||||
lp = l + "+"
|
||||
for el in filtered:
|
||||
el["result"] = float(el["result"].replace(lp, ""))
|
||||
self.plays.append(el)
|
||||
|
||||
self.plays.sort(key=lambda t: t["result"])
|
||||
self.turn = 0
|
||||
|
||||
def getPlayerName(self):
|
||||
return "xXx_7h3_5cRuM_M45T3r_xXx"
|
||||
@ -155,7 +157,7 @@ class myPlayer(PlayerInterface):
|
||||
if board.winner() == board._EMPTY:
|
||||
return 0.5
|
||||
|
||||
return math.inf if board.winner() == color else -math.inf
|
||||
return math.inf if board.winner() == self._mycolor else -math.inf
|
||||
|
||||
go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)
|
||||
|
||||
@ -174,58 +176,57 @@ class myPlayer(PlayerInterface):
|
||||
print("Referee told me to play but the game is over!")
|
||||
return "PASS"
|
||||
|
||||
duration = 1.
|
||||
|
||||
# Take more time in endgame
|
||||
if self._board._nbBLACK + self._board._nbWHITE < 10:
|
||||
duration = 1
|
||||
duration = 5
|
||||
|
||||
elif self._board._nbBLACK + self._board._nbWHITE < 30:
|
||||
duration = 1
|
||||
elif self._board._nbBLACK + self._board._nbWHITE < 40:
|
||||
duration = 20
|
||||
|
||||
elif self._board._nbBLACK + self._board._nbWHITE > 40:
|
||||
duration = 1 # 64 - (self._board._nbBLACK + self._board._nbWHITE)
|
||||
else:
|
||||
duration = 30
|
||||
|
||||
# Play quickly if lack of time
|
||||
duration = min(duration, (self.maxtime - self.time) / 10)
|
||||
move = -1
|
||||
score = 0
|
||||
|
||||
move = -1
|
||||
b, w = self._board.compute_score()
|
||||
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
|
||||
if self.last_op_move == "PASS" and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
|
||||
move = -1
|
||||
score = math.inf
|
||||
|
||||
elif self._board._nbBLACK + self._board._nbWHITE < 20:
|
||||
# If passing wins the game, pass
|
||||
if self.last_op_move == -1 and (b - w) * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
|
||||
move = -1
|
||||
|
||||
# Play greedily opening moves early in the game
|
||||
elif self._board._nbBLACK + self._board._nbWHITE < 10:
|
||||
turn = self._board._nbBLACK + self._board._nbWHITE
|
||||
for play in self.plays:
|
||||
if len(play["moves"]) > turn and Goban.Board.name_to_flat(play["moves"][turn]) in self._board.legal_moves():
|
||||
move = Goban.Board.name_to_flat(play["moves"][turn])
|
||||
score = 1
|
||||
|
||||
elif move == -1:
|
||||
move, score = IDDFS(
|
||||
self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=64
|
||||
# Use iddfs alphabeta
|
||||
else:
|
||||
move = IDDFS(
|
||||
self._board, self.nnheuristic, self._mycolor, duration=duration, max_depth=64
|
||||
)
|
||||
|
||||
self._board.push(move)
|
||||
print(move, score, file=stderr)
|
||||
nd = time.time()
|
||||
self.time += (nd - st)
|
||||
self.turn += 1
|
||||
|
||||
# New here: allows to consider internal representations of moves
|
||||
# move is an internal representation. To communicate with the interface I need to change if to a string
|
||||
print(move, (nd - st), file=stderr)
|
||||
|
||||
self._board.push(move)
|
||||
return Goban.Board.flat_to_name(move)
|
||||
|
||||
def playOpponentMove(self, move):
|
||||
print("Opponent played ", move) # New here
|
||||
# the board needs an internal represetation to push the move. Not a string
|
||||
self._board.push(Goban.Board.name_to_flat(move))
|
||||
self.last_op_move = move
|
||||
self.turn += 1
|
||||
self.last_op_move = Goban.Board.name_to_flat(move)
|
||||
|
||||
def newGame(self, color):
|
||||
self._board = Goban.Board()
|
||||
self._mycolor = color
|
||||
self.last_op_move = -2
|
||||
self.time = 0
|
||||
|
||||
def endGame(self, winner):
|
||||
if self._mycolor == winner:
|
||||
|
Loading…
x
Reference in New Issue
Block a user