enhance

2025-05-18 14:00:25 +02:00 · 2025-05-18 14:00:25 +02:00 · 96b3815280
commit 96b3815280
parent b657036355
2 changed files with 27 additions and 64 deletions
--- a/go_player/moveSearch.py
+++ b/go_player/moveSearch.py
@ -5,9 +5,6 @@ from typing import Any, Callable
 import Goban


-def _next_color(color):
-    return Goban.Board._BLACK if color == Goban.Board._WHITE else Goban.Board._WHITE
-
 # Returns heuristic, move
 def _alphabeta(
    board: Goban.Board,
@ -22,7 +19,7 @@ def _alphabeta(

    wantMax = (board.next_player == color)
    if depth == 0 or board.is_game_over():
-        return heuristic(board, color), move
+        return heuristic(board, board.next_player()), move

    if wantMax:
        acc = -math.inf, None
@ -38,7 +35,7 @@ def _alphabeta(
                    beta=beta,
                    move=move,
                    heuristic=heuristic,
-                    color=_next_color(color),
+                    color=color,
                    depth=depth - 1,
                )[0],
                move,
@ -57,6 +54,8 @@ def _alphabeta(
    else:
        acc = math.inf, None
        for move in board.generate_legal_moves():
+            if Goban.Board.flat_to_name(move) == "PASS":
+                continue

            board.push(move)
            value = (
@ -66,7 +65,7 @@ def _alphabeta(
                    beta=beta,
                    move=move,
                    heuristic=heuristic,
-                    color=_next_color(color),
+                    color=color,
                    depth=depth - 1,
                )[0],
                move,
--- a/go_player/myPlayer.py
+++ b/go_player/myPlayer.py
@ -108,53 +108,6 @@ class GoModel(nn.Module):
            return y_mean


-class GoDataset(Dataset):
-    def __init__(self, data, device, test=False):
-        def label(d, j):
-            if j == 0:
-                return d["black_wins"] / d["rollouts"]
-            else:
-                return 1 - label(d, 0)
-
-        def board(d, j, k):
-            if j == 0:
-                out = stones_to_board(d["black_stones"], d["white_stones"], d["depth"] % 2 == 0)
-            else:
-                out = stones_to_board(d["white_stones"], d["black_stones"], d["depth"] % 2 == 1)
-
-            if k == 0:
-                return out
-            else:
-                return out.flipud()
-
-        if test:
-            dims = [1, 2]
-            self.boards = torch.from_numpy(np.array([
-                board(d, 0, 0) for d in data
-            ])).float().to(device)
-            self.labels = torch.from_numpy(np.array(
-                [label(d, 0) for d in data],
-            )).float().to(device)
-        else:
-            dims = [1, 2]
-            self.boards = torch.from_numpy(np.array([
-                torch.rot90(board(d, j, k), i, dims)
-                for d in data
-                for k in range(2)
-                for i in range(4)
-                for j in range(2)
-            ])).float().to(device)
-            self.labels = torch.from_numpy(np.array(
-                [label(d, j) for d in data for _ in range(4) for _k in range(2) for j in range(2)],
-            )).float().to(device)
-
-    def __len__(self):
-        return len(self.boards)
-
-    def __getitem__(self, i):
-        return self.boards[i], self.labels[i]
-
-
 class myPlayer(PlayerInterface):
    """
    Example of a random player for the go. The only tricky part is to be able to handle
@ -173,6 +126,10 @@ class myPlayer(PlayerInterface):

        checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
        self.model.load_state_dict(checkpoint["model_state_dict"])
+        self.last_op_move = None
+
+        self.maxtime = 1800
+        self.time = 0

    def getPlayerName(self):
        return "xXx_7h3_5cRuM_M45T3r_xXx"
@ -204,6 +161,7 @@ class myPlayer(PlayerInterface):
        return 1 - prediction

    def getPlayerMove(self):
+        st = time.time()
        if self._board.is_game_over():
            print("Referee told me to play but the game is over!")
            return "PASS"
@ -211,25 +169,30 @@ class myPlayer(PlayerInterface):
        duration = 1.

        if self._board._nbBLACK + self._board._nbWHITE < 10:
-            max_depth = 1
+            duration = 3

-        elif self._board._nbBLACK + self._board._nbWHITE < 20:
-            max_depth = 2
+        elif self._board._nbBLACK + self._board._nbWHITE < 30:
+            duration = 5

-        elif self._board._nbBLACK + self._board._nbWHITE < 40:
-            max_depth = 3
-
-        else:
+        elif self._board._nbBLACK + self._board._nbWHITE > 40:
            duration = 64 - (self._board._nbBLACK + self._board._nbWHITE)
-            max_depth = 24
+
+        duration = min(duration, (self.maxtime - self.time) / 10)

        # move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
-        move, score = IDDFS(
-            self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=max_depth
-        )
+        if self.last_op_move == "PASS" and self._board.diff_stones_board() * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
+            move = -1
+            score = math.inf
+
+        else:
+            move, score = IDDFS(
+                self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=64
+            )

        self._board.push(move)
        print(move, score, file=stderr)
+        nd = time.time()
+        self.time += (nd - st)

        # New here: allows to consider internal representations of moves
        # move is an internal representation. To communicate with the interface I need to change if to a string
@ -239,6 +202,7 @@ class myPlayer(PlayerInterface):
        print("Opponent played ", move)  # New here
        # the board needs an internal represetation to push the move.  Not a string
        self._board.push(Goban.Board.name_to_flat(move))
+        self.last_op_move = move

    def newGame(self, color):
        self._mycolor = color