enhance
This commit is contained in:
parent
b657036355
commit
96b3815280
@ -5,9 +5,6 @@ from typing import Any, Callable
|
|||||||
import Goban
|
import Goban
|
||||||
|
|
||||||
|
|
||||||
def _next_color(color):
|
|
||||||
return Goban.Board._BLACK if color == Goban.Board._WHITE else Goban.Board._WHITE
|
|
||||||
|
|
||||||
# Returns heuristic, move
|
# Returns heuristic, move
|
||||||
def _alphabeta(
|
def _alphabeta(
|
||||||
board: Goban.Board,
|
board: Goban.Board,
|
||||||
@ -22,7 +19,7 @@ def _alphabeta(
|
|||||||
|
|
||||||
wantMax = (board.next_player == color)
|
wantMax = (board.next_player == color)
|
||||||
if depth == 0 or board.is_game_over():
|
if depth == 0 or board.is_game_over():
|
||||||
return heuristic(board, color), move
|
return heuristic(board, board.next_player()), move
|
||||||
|
|
||||||
if wantMax:
|
if wantMax:
|
||||||
acc = -math.inf, None
|
acc = -math.inf, None
|
||||||
@ -38,7 +35,7 @@ def _alphabeta(
|
|||||||
beta=beta,
|
beta=beta,
|
||||||
move=move,
|
move=move,
|
||||||
heuristic=heuristic,
|
heuristic=heuristic,
|
||||||
color=_next_color(color),
|
color=color,
|
||||||
depth=depth - 1,
|
depth=depth - 1,
|
||||||
)[0],
|
)[0],
|
||||||
move,
|
move,
|
||||||
@ -57,6 +54,8 @@ def _alphabeta(
|
|||||||
else:
|
else:
|
||||||
acc = math.inf, None
|
acc = math.inf, None
|
||||||
for move in board.generate_legal_moves():
|
for move in board.generate_legal_moves():
|
||||||
|
if Goban.Board.flat_to_name(move) == "PASS":
|
||||||
|
continue
|
||||||
|
|
||||||
board.push(move)
|
board.push(move)
|
||||||
value = (
|
value = (
|
||||||
@ -66,7 +65,7 @@ def _alphabeta(
|
|||||||
beta=beta,
|
beta=beta,
|
||||||
move=move,
|
move=move,
|
||||||
heuristic=heuristic,
|
heuristic=heuristic,
|
||||||
color=_next_color(color),
|
color=color,
|
||||||
depth=depth - 1,
|
depth=depth - 1,
|
||||||
)[0],
|
)[0],
|
||||||
move,
|
move,
|
||||||
|
@ -108,53 +108,6 @@ class GoModel(nn.Module):
|
|||||||
return y_mean
|
return y_mean
|
||||||
|
|
||||||
|
|
||||||
class GoDataset(Dataset):
|
|
||||||
def __init__(self, data, device, test=False):
|
|
||||||
def label(d, j):
|
|
||||||
if j == 0:
|
|
||||||
return d["black_wins"] / d["rollouts"]
|
|
||||||
else:
|
|
||||||
return 1 - label(d, 0)
|
|
||||||
|
|
||||||
def board(d, j, k):
|
|
||||||
if j == 0:
|
|
||||||
out = stones_to_board(d["black_stones"], d["white_stones"], d["depth"] % 2 == 0)
|
|
||||||
else:
|
|
||||||
out = stones_to_board(d["white_stones"], d["black_stones"], d["depth"] % 2 == 1)
|
|
||||||
|
|
||||||
if k == 0:
|
|
||||||
return out
|
|
||||||
else:
|
|
||||||
return out.flipud()
|
|
||||||
|
|
||||||
if test:
|
|
||||||
dims = [1, 2]
|
|
||||||
self.boards = torch.from_numpy(np.array([
|
|
||||||
board(d, 0, 0) for d in data
|
|
||||||
])).float().to(device)
|
|
||||||
self.labels = torch.from_numpy(np.array(
|
|
||||||
[label(d, 0) for d in data],
|
|
||||||
)).float().to(device)
|
|
||||||
else:
|
|
||||||
dims = [1, 2]
|
|
||||||
self.boards = torch.from_numpy(np.array([
|
|
||||||
torch.rot90(board(d, j, k), i, dims)
|
|
||||||
for d in data
|
|
||||||
for k in range(2)
|
|
||||||
for i in range(4)
|
|
||||||
for j in range(2)
|
|
||||||
])).float().to(device)
|
|
||||||
self.labels = torch.from_numpy(np.array(
|
|
||||||
[label(d, j) for d in data for _ in range(4) for _k in range(2) for j in range(2)],
|
|
||||||
)).float().to(device)
|
|
||||||
|
|
||||||
def __len__(self):
|
|
||||||
return len(self.boards)
|
|
||||||
|
|
||||||
def __getitem__(self, i):
|
|
||||||
return self.boards[i], self.labels[i]
|
|
||||||
|
|
||||||
|
|
||||||
class myPlayer(PlayerInterface):
|
class myPlayer(PlayerInterface):
|
||||||
"""
|
"""
|
||||||
Example of a random player for the go. The only tricky part is to be able to handle
|
Example of a random player for the go. The only tricky part is to be able to handle
|
||||||
@ -173,6 +126,10 @@ class myPlayer(PlayerInterface):
|
|||||||
|
|
||||||
checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
|
checkpoint = torch.load("scrum.pt", weights_only=True, map_location=self.device)
|
||||||
self.model.load_state_dict(checkpoint["model_state_dict"])
|
self.model.load_state_dict(checkpoint["model_state_dict"])
|
||||||
|
self.last_op_move = None
|
||||||
|
|
||||||
|
self.maxtime = 1800
|
||||||
|
self.time = 0
|
||||||
|
|
||||||
def getPlayerName(self):
|
def getPlayerName(self):
|
||||||
return "xXx_7h3_5cRuM_M45T3r_xXx"
|
return "xXx_7h3_5cRuM_M45T3r_xXx"
|
||||||
@ -204,6 +161,7 @@ class myPlayer(PlayerInterface):
|
|||||||
return 1 - prediction
|
return 1 - prediction
|
||||||
|
|
||||||
def getPlayerMove(self):
|
def getPlayerMove(self):
|
||||||
|
st = time.time()
|
||||||
if self._board.is_game_over():
|
if self._board.is_game_over():
|
||||||
print("Referee told me to play but the game is over!")
|
print("Referee told me to play but the game is over!")
|
||||||
return "PASS"
|
return "PASS"
|
||||||
@ -211,25 +169,30 @@ class myPlayer(PlayerInterface):
|
|||||||
duration = 1.
|
duration = 1.
|
||||||
|
|
||||||
if self._board._nbBLACK + self._board._nbWHITE < 10:
|
if self._board._nbBLACK + self._board._nbWHITE < 10:
|
||||||
max_depth = 1
|
duration = 3
|
||||||
|
|
||||||
elif self._board._nbBLACK + self._board._nbWHITE < 20:
|
elif self._board._nbBLACK + self._board._nbWHITE < 30:
|
||||||
max_depth = 2
|
duration = 5
|
||||||
|
|
||||||
elif self._board._nbBLACK + self._board._nbWHITE < 40:
|
elif self._board._nbBLACK + self._board._nbWHITE > 40:
|
||||||
max_depth = 3
|
|
||||||
|
|
||||||
else:
|
|
||||||
duration = 64 - (self._board._nbBLACK + self._board._nbWHITE)
|
duration = 64 - (self._board._nbBLACK + self._board._nbWHITE)
|
||||||
max_depth = 24
|
|
||||||
|
duration = min(duration, (self.maxtime - self.time) / 10)
|
||||||
|
|
||||||
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
|
# move = alphabeta(self._board, self.nnheuristic, self._mycolor, 1)
|
||||||
|
if self.last_op_move == "PASS" and self._board.diff_stones_board() * (1 if self._mycolor == Goban.Board._BLACK else -1) > 0:
|
||||||
|
move = -1
|
||||||
|
score = math.inf
|
||||||
|
|
||||||
|
else:
|
||||||
move, score = IDDFS(
|
move, score = IDDFS(
|
||||||
self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=max_depth
|
self._board, self.nnheuristic, self._mycolor, duration=duration, maxdepth=64
|
||||||
)
|
)
|
||||||
|
|
||||||
self._board.push(move)
|
self._board.push(move)
|
||||||
print(move, score, file=stderr)
|
print(move, score, file=stderr)
|
||||||
|
nd = time.time()
|
||||||
|
self.time += (nd - st)
|
||||||
|
|
||||||
# New here: allows to consider internal representations of moves
|
# New here: allows to consider internal representations of moves
|
||||||
# move is an internal representation. To communicate with the interface I need to change if to a string
|
# move is an internal representation. To communicate with the interface I need to change if to a string
|
||||||
@ -239,6 +202,7 @@ class myPlayer(PlayerInterface):
|
|||||||
print("Opponent played ", move) # New here
|
print("Opponent played ", move) # New here
|
||||||
# the board needs an internal represetation to push the move. Not a string
|
# the board needs an internal represetation to push the move. Not a string
|
||||||
self._board.push(Goban.Board.name_to_flat(move))
|
self._board.push(Goban.Board.name_to_flat(move))
|
||||||
|
self.last_op_move = move
|
||||||
|
|
||||||
def newGame(self, color):
|
def newGame(self, color):
|
||||||
self._mycolor = color
|
self._mycolor = color
|
||||||
|
Loading…
x
Reference in New Issue
Block a user