Search code examples
pythonminimaxtic-tac-toe

Minimax explained for an idiot


I've wasted my entire day trying to use the minimax algorithm to make an unbeatable tictactoe AI. I missed something along the way (brain fried).

I'm not looking for code here, just a better explanation of where I went wrong.

Here is my current code (the minimax method always returns 0 for some reason):

from copy import deepcopy


class Square(object):
    def __init__(self, player=None):
        self.player = player
    
    @property
    def empty(self):
        return self.player is None


class Board(object):
    winning_combos = (
        [0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 3, 6], [1, 4, 7], [2, 5, 8],
        [0, 4, 8], [2, 4, 6],
    )
    
    def __init__(self, squares={}):
        self.squares = squares
        for i in range(9):
            if self.squares.get(i) is None:
                self.squares[i] = Square()
    
    @property
    def available_moves(self):
        return [k for k, v in self.squares.iteritems() if v.empty]
    
    @property
    def complete(self):
        for combo in self.winning_combos:
            combo_available = True
            for pos in combo:
                if not pos in self.available_moves:
                    combo_available = False
            if combo_available:
                return self.winner is not None
        return True
    
    @property
    def player_won(self):
        return self.winner == 'X'
    
    @property
    def computer_won(self):
        return self.winner == 'O'
    
    @property
    def tied(self):
        return self.complete == True and self.winner is None
    
    @property
    def winner(self):
        for player in ('X', 'O'):
            positions = self.get_squares(player)
            for combo in self.winning_combos:
                win = True
                for pos in combo:
                    if pos not in positions:
                        win = False
                if win:
                    return player
        return None
    
    @property
    def heuristic(self):
        if self.player_won:
            return -1
        elif self.tied:
            return 0
        elif self.computer_won:
            return 1
    
    def get_squares(self, player):
        return [k for k,v in self.squares.iteritems() if v.player == player]
    
    def make_move(self, position, player):
        self.squares[position] = Square(player)
    
    def minimax(self, node, player):
        if node.complete:
            return node.heuristic
        a = -1e10000
        for move in node.available_moves:
            child = deepcopy(node)
            child.make_move(move, player)
            a = max([a, -self.minimax(child, get_enemy(player))])
        return a


def get_enemy(player):
    if player == 'X':
        return 'O'
    return 'X'

Solution

  • Your complete function is not working as expected, causing games to be declared tied before anything can happen. For instance, consider this setup:

    >> oWinning = {
     1: Square('X'),
     3: Square('O'), 4: Square('X'),
     6: Square('O'), 8: Square('X'),
    }
    >> nb = Board(oWinning)
    >> nb.complete
    True
    >> nb.tied
    True
    

    This should be a win for the computer on the next move. Instead, it says the game is tied.

    The problem is that your logic in complete, right now, checks to see if all of the squares in a combo are free. If any of them are not, it presumes that that combo can't be won with. What it needs to do is check if any positions in that combo are occupied, and so long as all of those combos are either None or the same player, that combo should be considered still available.

    e.g.

    def available_combos(self, player):
        return self.available_moves + self.get_squares(player)
    
    @property
    def complete(self):
        for player in ('X', 'O'):
            for combo in self.winning_combos:
                combo_available = True
                for pos in combo:
                    if not pos in self.available_combos(player):
                       combo_available = False
                if combo_available:
                    return self.winner is not None
        return True
    

    Now that I properly tested this with the updated code I'm getting the expected result on this test case:

    >>> nb.minimax(nb, 'O')
    -1
    >>> nb.minimax(nb, 'X')
    1