python artificial-intelligence minimax alpha-beta-pruning 2048

2048 game - AI can't score more that 256 average

I'm trying to implement AI for 2048 with MiniMax and Alpha-Beta pruning, based on a snake strategy (see this paper), which seems to be the best as a single heuristics.

Unfortunately, AI makes 256 in most games, what is not much better than empty cells heuristics. I've already read related topics here, but can't find solution myself.

Here is the code:

import math
from BaseAI_3 import BaseAI

INF_P = math.inf

class PlayerAI(BaseAI):
    move_str = {
        0: "UP",
        1: "DOWN",
        2: "LEFT",
        3: "RIGHT"
    }

    def __init__(self):
        super().__init__()
        self.depth_max = 4

    def getMove(self, grid):
        move_direction, state, utility = self.decision(grid)
        act_move = moves.index(move_direction)
        return moves[act_move] if moves else None

    def get_children(self, grid):
        grid.children = []
        for move_direction in grid.getAvailableMoves():
            gridCopy = grid.clone()
            gridCopy.path = grid.path[:]
            gridCopy.path.append(PlayerAI.move_str[move_direction])
            gridCopy.move(move_direction)
            gridCopy.depth_current = grid.depth_current + 1
            grid.children.append((move_direction, gridCopy))
        return grid.children

    def utility(self, state):

        def snake():
            poses = [
                [
                    [2 ** 15, 2 ** 14, 2 ** 13, 2 ** 12],
                    [2 ** 8, 2 ** 9, 2 ** 10, 2 ** 11],
                    [2 ** 7, 2 ** 6, 2 ** 5, 2 ** 4],
                    [2 ** 0, 2 ** 1, 2 ** 2, 2 ** 3]
                ]
                ,
                [
                   [2 ** 15, 2 ** 8, 2 ** 7, 2 ** 0],
                   [2 ** 14, 2 ** 9, 2 ** 6, 2 ** 1],
                   [2 ** 13, 2 ** 10, 2 ** 5, 2 ** 2],
                   [2 ** 12, 2 ** 11, 2 ** 4, 2 ** 3]
                ]
            ]

            poses.append([item for item in reversed(poses[0])])
            poses.append([list(reversed(item)) for item in reversed(poses[0])])
            poses.append([list(reversed(item)) for item in poses[0]])

            poses.append([item for item in reversed(poses[1])])
            poses.append([list(reversed(item)) for item in reversed(poses[1])])
            poses.append([list(reversed(item)) for item in poses[1]])

            max_value = -INF_P
            for pos in poses:
                value = 0
                for i in range(state.size):
                    for j in range(state.size):
                        value += state.map[i][j] * pos[i][j]

                if value > max_value:
                    max_value = value

            return max_value

        weight_snake = 1 / (2 ** 13)

        value = (
            weight_snake * snake(),
        )

        return value

    def decision(self, state):
        state.depth_current = 1
        state.path = []
        return self.maximize(state, -INF_P, INF_P)

    def terminal_state(self, state):
        return state.depth_current >= self.depth_max

    def maximize(self, state, alpha, beta):
        # terminal-state check
        if self.terminal_state(state):
            return (None, state, self.utility(state))

        max_move_direction, max_child, max_utility = None, None, (-INF_P, )
        for move_direction, child in self.get_children(state):
            _, state2, utility = self.minimize(child, alpha, beta)
            child.utility = utility

            if sum(utility) > sum(max_utility):
                max_move_direction, max_child, max_utility = move_direction, child, utility

            if sum(max_utility) >= beta:
                break

            if sum(max_utility) > alpha:
                alpha = sum(max_utility)

        state.utility = max_utility
        state.alpha = alpha
        state.beta = beta

        return max_move_direction, max_child, max_utility

    def minimize(self, state, alpha, beta):
        # terminal-state check
        if self.terminal_state(state):
            return (None, state, self.utility(state))

        min_move_direction, min_child, min_utility = None, None, (INF_P, )
        for move_direction, child in self.get_children(state):
            _, state2, utility = self.maximize(child, alpha, beta)
            child.utility = utility

            if sum(utility) < sum(min_utility):
                min_move_direction, min_child, min_utility = move_direction, child, utility

            if sum(min_utility) <= alpha:
                break

            if sum(min_utility) < beta:
                beta = sum(min_utility)

        state.utility = min_utility
        state.alpha = alpha
        state.beta = beta

        return min_move_direction, min_child, min_utility

grid is an object, grid.map is a two-dimentional array (list of lists).

Do I have any mistakes? How can I imporove the code?

Added game log: https://pastebin.com/eyzgU2dN

Solution

On the past weekend I've realized that algorithm was not properly implemented. A mistake was in the minimize() function, where I search for children in a wrong way - it should be like this:

def get_opponent_children(self, grid):
    grid.children = []
    for x in range(grid.size):
        for y in range(grid.size):
            if grid.map[x][y] == 0:
                for c in (2, 4):
                    gridCopy = grid.clone()
                    gridCopy.path = grid.path[:]
                    gridCopy.deep_current = grid.deep_current + 1
                    gridCopy.map[x][y] = c
                    grid.children.append((None, gridCopy))

    return grid.children

and corresponding change:

for move_direction, child in self.get_opponent_children(state):

Now it's ok to hit 1024 and 2048 most of time.