This commit is contained in:
Nemo D'ACREMONT 2025-05-18 14:00:25 +02:00
parent b657036355
commit 96b3815280
2 changed files with 27 additions and 64 deletions

View File

@ -5,9 +5,6 @@ from typing import Any, Callable
import Goban import Goban
def _next_color(color):
return Goban.Board._BLACK if color == Goban.Board._WHITE else Goban.Board._WHITE
# Returns heuristic, move # Returns heuristic, move
def _alphabeta( def _alphabeta(
board: Goban.Board, board: Goban.Board,
@ -22,7 +19,7 @@ def _alphabeta(
wantMax = (board.next_player == color) wantMax = (board.next_player == color)
if depth == 0 or board.is_game_over(): if depth == 0 or board.is_game_over():
return heuristic(board, color), move return heuristic(board, board.next_player()), move
if wantMax: if wantMax:
acc = -math.inf, None acc = -math.inf, None
@ -38,7 +35,7 @@ def _alphabeta(
beta=beta, beta=beta,
move=move, move=move,
heuristic=heuristic, heuristic=heuristic,
color=_next_color(color), color=color,
depth=depth - 1, depth=depth - 1,
)[0], )[0],
move, move,
@ -57,6 +54,8 @@ def _alphabeta(
else: else:
acc = math.inf, None acc = math.inf, None
for move in board.generate_legal_moves(): for move in board.generate_legal_moves():
if Goban.Board.flat_to_name(move) == "PASS":
continue
board.push(move) board.push(move)
value = ( value = (
@ -66,7 +65,7 @@ def _alphabeta(
beta=beta, beta=beta,
move=move, move=move,
heuristic=heuristic, heuristic=heuristic,
color=_next_color(color), color=color,
depth=depth - 1, depth=depth - 1,
)[0], )[0],
move, move,

View File

@ -108,53 +108,6 @@ class GoModel(nn.Module):
return y_mean return y_mean
class GoDataset(Dataset):
def __init__(self, data, device, test=False):
def label(d, j):
if j == 0:
return d["black_wins"] / d["rollouts"]
else:
return 1 - label(d, 0)
def board(d, j, k):
if j == 0:
out = stones_to_board(d["black_stones"], d["white_stones"], d["depth"] % 2 == 0)
else:
out = stones_to_board(d["white_stones"], d["black_stones"], d["depth"] % 2 == 1)
if k == 0:
return out
else:
return out.flipud()
if test:
dims = [1, 2]
self.boards = torch.from_numpy(np.array([
board(d, 0, 0) for d in data
])).float().to(device)
self.labels = torch.from_numpy(np.array(
[label(d, 0) for d in data],
)).float().to(device)
else:
dims = [1, 2]
self.boards = torch.from_numpy(np.array([
torch.rot90(board(d, j, k), i, dims)
for d in data
for k in range(2)
for i in range(4)
for j in range(2)
])).float().to(device)
self.labels = torch.from_numpy(np.array(
[label(d, j) for d in data for _ in range(4) for _k in range(2) for j in range(2)],
)).float().to(device)
def __len__(self):
return len(self.boards)
def __getitem__(self, i):
return self.boards[i], self.labels[i]
class myPlayer(PlayerInterface): class myPlayer(PlayerInterface):
""" """
Example of a random player for the go. The only tricky part is to be able to handle Example of a random player for the go. The only tricky part is to be able to handle
@ -173,6 +126,10 @@ class myPlayer(PlayerInterface):
checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device) checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
self.model.load_state_dict(checkpoint["model_state_dict"]) self.model.load_state_dict(checkpoint["model_state_dict"])
self.last_op_move = None
self.maxtime = 1800
self.time = 0
def getPlayerName(self): def getPlayerName(self):
return "xXx_7h3_5cRuM_M45T3r_xXx" return "xXx_7h3_5cRuM_M45T3r_xXx"
@ -204,6 +161,7 @@ class myPlayer(PlayerInterface):
return 1 - prediction return 1 - prediction
def getPlayerMove(self): def getPlayerMove(self):
st = time.time()
if self._board.is_game_over(): if self._board.is_game_over():
print("Referee told me to play but the game is over!") print("Referee told me to play but the game is over!")
return "PASS" return "PASS"
@ -211,25 +169,30 @@ class myPlayer(PlayerInterface):
duration = 1. duration = 1.
if self._board._nbBLACK + self._board._nbWHITE < 10: if self._board._nbBLACK + self._board._nbWHITE < 10:
max_depth = 1 duration = 3
elif self._board._nbBLACK + self._board._nbWHITE < 20: elif self._board._nbBLACK + self._board._nbWHITE < 30:
max_depth = 2 duration = 5
elif self._board._nbBLACK + self._board._nbWHITE < 40: elif self._board._nbBLACK + self._board._nbWHITE > 40:
max_depth = 3
else:
duration = 64 - (self._board._nbBLACK + self._board._nbWHITE) duration = 64 - (self._board._nbBLACK + self._board._nbWHITE)
max_depth = 24
duration = min(duration, (self.maxtime - self.time) / 10)
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1) # move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
if self.last_op_move == "PASS" and self._board.diff_stones_board() * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
move = -1
score = math.inf
else:
move, score = IDDFS( move, score = IDDFS(
self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=max_depth self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=64
) )
self._board.push(move) self._board.push(move)
print(move, score, file=stderr) print(move, score, file=stderr)
nd = time.time()
self.time += (nd - st)
# New here: allows to consider internal representations of moves # New here: allows to consider internal representations of moves
# move is an internal representation. To communicate with the interface I need to change if to a string # move is an internal representation. To communicate with the interface I need to change if to a string
@ -239,6 +202,7 @@ class myPlayer(PlayerInterface):
print("Opponent played ", move) # New here print("Opponent played ", move) # New here
# the board needs an internal represetation to push the move. Not a string # the board needs an internal represetation to push the move. Not a string
self._board.push(Goban.Board.name_to_flat(move)) self._board.push(Goban.Board.name_to_flat(move))
self.last_op_move = move
def newGame(self, color): def newGame(self, color):
self._mycolor = color self._mycolor = color