I'm trying to implement AI for 2048 with MiniMax and Alpha-Beta pruning, based on a snake strategy (see this paper), which seems to be the best as a single heuristics.
Unfortunately, AI makes 256 in most games, what is not much better than empty cells heuristics. I've already read related topics here, but can't find solution myself.
Here is the code:
import math
from BaseAI_3 import BaseAI
INF_P = math.inf
class PlayerAI(BaseAI):
move_str = {
0: "UP",
1: "DOWN",
2: "LEFT",
3: "RIGHT"
}
def __init__(self):
super().__init__()
self.depth_max = 4
def getMove(self, grid):
move_direction, state, utility = self.decision(grid)
act_move = moves.index(move_direction)
return moves[act_move] if moves else None
def get_children(self, grid):
grid.children = []
for move_direction in grid.getAvailableMoves():
gridCopy = grid.clone()
gridCopy.path = grid.path[:]
gridCopy.path.append(PlayerAI.move_str[move_direction])
gridCopy.move(move_direction)
gridCopy.depth_current = grid.depth_current + 1
grid.children.append((move_direction, gridCopy))
return grid.children
def utility(self, state):
def snake():
poses = [
[
[2 ** 15, 2 ** 14, 2 ** 13, 2 ** 12],
[2 ** 8, 2 ** 9, 2 ** 10, 2 ** 11],
[2 ** 7, 2 ** 6, 2 ** 5, 2 ** 4],
[2 ** 0, 2 ** 1, 2 ** 2, 2 ** 3]
]
,
[
[2 ** 15, 2 ** 8, 2 ** 7, 2 ** 0],
[2 ** 14, 2 ** 9, 2 ** 6, 2 ** 1],
[2 ** 13, 2 ** 10, 2 ** 5, 2 ** 2],
[2 ** 12, 2 ** 11, 2 ** 4, 2 ** 3]
]
]
poses.append([item for item in reversed(poses[0])])
poses.append([list(reversed(item)) for item in reversed(poses[0])])
poses.append([list(reversed(item)) for item in poses[0]])
poses.append([item for item in reversed(poses[1])])
poses.append([list(reversed(item)) for item in reversed(poses[1])])
poses.append([list(reversed(item)) for item in poses[1]])
max_value = -INF_P
for pos in poses:
value = 0
for i in range(state.size):
for j in range(state.size):
value += state.map[i][j] * pos[i][j]
if value > max_value:
max_value = value
return max_value
weight_snake = 1 / (2 ** 13)
value = (
weight_snake * snake(),
)
return value
def decision(self, state):
state.depth_current = 1
state.path = []
return self.maximize(state, -INF_P, INF_P)
def terminal_state(self, state):
return state.depth_current >= self.depth_max
def maximize(self, state, alpha, beta):
# terminal-state check
if self.terminal_state(state):
return (None, state, self.utility(state))
max_move_direction, max_child, max_utility = None, None, (-INF_P, )
for move_direction, child in self.get_children(state):
_, state2, utility = self.minimize(child, alpha, beta)
child.utility = utility
if sum(utility) > sum(max_utility):
max_move_direction, max_child, max_utility = move_direction, child, utility
if sum(max_utility) >= beta:
break
if sum(max_utility) > alpha:
alpha = sum(max_utility)
state.utility = max_utility
state.alpha = alpha
state.beta = beta
return max_move_direction, max_child, max_utility
def minimize(self, state, alpha, beta):
# terminal-state check
if self.terminal_state(state):
return (None, state, self.utility(state))
min_move_direction, min_child, min_utility = None, None, (INF_P, )
for move_direction, child in self.get_children(state):
_, state2, utility = self.maximize(child, alpha, beta)
child.utility = utility
if sum(utility) < sum(min_utility):
min_move_direction, min_child, min_utility = move_direction, child, utility
if sum(min_utility) <= alpha:
break
if sum(min_utility) < beta:
beta = sum(min_utility)
state.utility = min_utility
state.alpha = alpha
state.beta = beta
return min_move_direction, min_child, min_utility
grid
is an object, grid.map
is a two-dimentional array (list of lists).
Do I have any mistakes? How can I imporove the code?
Added game log: https://pastebin.com/eyzgU2dN
On the past weekend I've realized that algorithm was not properly implemented. A mistake was in the minimize()
function, where I search for children in a wrong way - it should be like this:
def get_opponent_children(self, grid):
grid.children = []
for x in range(grid.size):
for y in range(grid.size):
if grid.map[x][y] == 0:
for c in (2, 4):
gridCopy = grid.clone()
gridCopy.path = grid.path[:]
gridCopy.deep_current = grid.deep_current + 1
gridCopy.map[x][y] = c
grid.children.append((None, gridCopy))
return grid.children
and corresponding change:
for move_direction, child in self.get_opponent_children(state):
Now it's ok to hit 1024 and 2048 most of time.