random

2025-05-18 13:40:54 +02:00 · 2025-05-18 13:40:54 +02:00 · b657036355
commit b657036355
parent 6b69bbc116
2 changed files with 51 additions and 23 deletions
--- a/go_player/moveSearch.py
+++ b/go_player/moveSearch.py
@ -17,15 +17,19 @@ def _alphabeta(
    alpha=-math.inf,
    beta=math.inf,
    depth: int = 3,
+    shouldStop = lambda: False
 ) -> tuple[float, Any]:

-    wantMax = (board.next_player != color)
+    wantMax = (board.next_player == color)
    if depth == 0 or board.is_game_over():
        return heuristic(board, color), move

    if wantMax:
        acc = -math.inf, None
        for move in board.generate_legal_moves():
+            if Goban.Board.flat_to_name(move) == "PASS":
+                continue
+
            board.push(move)
            value = (
                _alphabeta(
@ -46,13 +50,14 @@ def _alphabeta(
            )
            board.pop()

-            if acc[0] >= beta:
+            if shouldStop() or acc[0] >= beta:
                break  # beta cutoff
-            alpha = max(alpha, value[0])
+            alpha = max(alpha, acc[0])

    else:
        acc = math.inf, None
        for move in board.generate_legal_moves():
+
            board.push(move)
            value = (
                _alphabeta(
@ -73,9 +78,9 @@ def _alphabeta(
            )
            board.pop()

-            if acc[0] <= alpha:
+            if shouldStop() or acc[0] <= alpha:
                break  # alpha cutoff
-            beta = min(beta, value[0])
+            beta = min(beta, acc[0])

    return acc

@ -92,15 +97,30 @@ def alphabeta(

 def IDDFS(board: Goban.Board, heuristic, color, duration: float, maxdepth=42):
    st = time.time()
-    depth = 1
+    shouldStop = (lambda: time.time() - st > duration)
+    depth = 0
    move = -1
+    score = -1

-    while time.time() - st < duration and depth <= maxdepth:
-        print("depth:", depth, time.time() - st, file=stderr)
-        move = _alphabeta(
-            board, heuristic, color, move=-1, alpha=-10, beta=10, depth=depth
-        )[1]
+    while not shouldStop() and depth <= maxdepth:
+        if depth % 2 == 0:
+            score, move = _alphabeta(
+                    board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
+            )
+
+            if score == math.inf:
+                return move, score
+
+        else:
+            score, move = _alphabeta(
+                board, heuristic, color, move=move, alpha=-math.inf, beta=math.inf, depth=depth, shouldStop=shouldStop
+            )
+
+            if score == -math.inf:
+                return move, score
+
+        print("depth:", depth, time.time() - st, score,  file=stderr)
        depth += 1

    print(time.time() - st, duration, depth, file=stderr)
-    return move
+    return move, score
--- a/go_player/myPlayer.py
+++ b/go_player/myPlayer.py
@ -19,11 +19,9 @@ import numpy as np
 from torch.utils.data import Dataset

 def setup_device():
-    torch.set_float32_matmul_precision("medium")
    # Allows to use the GPU if available
    if torch.cuda.is_available():
        device = torch.device("cuda")
-        torch.backends.cuda.matmul.allow_tf32 = True
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
@ -167,7 +165,6 @@ class myPlayer(PlayerInterface):
    def __init__(self):
        self._board = Goban.Board()
        self._mycolor = None
-        self.moveCount = 0

        self.device = setup_device()
        print(self.device)
@ -190,13 +187,16 @@ class myPlayer(PlayerInterface):

    def nnheuristic(self, board: Goban.Board, color):
        if board.is_game_over():
+            if board.winner() == board._EMPTY:
+                return 0.5
+
            return math.inf if board.winner() == color else -math.inf

        go_board = torch.from_numpy(np.array([goban2Go(board)])).float().to(self.device)

        self.model.eval()
        with torch.no_grad():
-            prediction = self.model(go_board)
+            prediction = self.model(go_board).item()

        if color == Goban.Board._BLACK:
            return prediction
@ -208,33 +208,41 @@ class myPlayer(PlayerInterface):
            print("Referee told me to play but the game is over!")
            return "PASS"

-        if self.moveCount < 40:
+        duration = 1.
+
+        if self._board._nbBLACK + self._board._nbWHITE < 10:
            max_depth = 1
+
+        elif self._board._nbBLACK + self._board._nbWHITE < 20:
+            max_depth = 2
+
+        elif self._board._nbBLACK + self._board._nbWHITE < 40:
+            max_depth = 3
+
        else:
-            max_depth = 5
+            duration = 64 - (self._board._nbBLACK + self._board._nbWHITE)
+            max_depth = 24

        # move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
-        move = IDDFS(
-            self._board, self.nnheuristic, self._mycolor, duration=1., maxdepth=max_depth
+        move, score = IDDFS(
+            self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=max_depth
        )

        self._board.push(move)
+        print(move, score, file=stderr)

        # New here: allows to consider internal representations of moves
        # move is an internal representation. To communicate with the interface I need to change if to a string
-        self.moveCount += 1 if Goban.Board.flat_to_name(move) != "PASS" else 0
        return Goban.Board.flat_to_name(move)

    def playOpponentMove(self, move):
        print("Opponent played ", move)  # New here
        # the board needs an internal represetation to push the move.  Not a string
-        self.moveCount += 1 if move != "PASS" else 0
        self._board.push(Goban.Board.name_to_flat(move))

    def newGame(self, color):
        self._mycolor = color
        self._opponent = Goban.Board.flip(color)
-        self.moveCount = 0

    def endGame(self, winner):
        if self._mycolor == winner: