Search code examples
pythonpython-3.xalgorithmminimaxalpha-beta-pruning

minimax losing only in one case of tic-tac-toe


I have a minimax function which works fine other times. But when I implement both alpha beta pruning and hash table it loses only in one situation. If I apply one of the optimizations or none then works fine all cases. That losing case only occurs when the ai goes second:

I go 9, 1, 3, 6.

ai go 5, 7, 2.

 X | O | X 
---+---+---
 4 | O | X 
---+---+---
 O | 8 | X

ai: O

function:


def minimax(depth, alpha, beta):
    new_depth = depth + 1
    avails = core_func.availables()
    if new_depth % 2:  # maximizer
        if core_func.check_win():
            return -1  # previously minimizer
        elif not avails:
            return 0
        best_score = -float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = variables.symbol
            t = tuple(variables.grid)
            if t not in variables.states:
                variables.states[t] = minimax(new_depth, alpha, beta)
            best_score = max(best_score, variables.states[t])
            alpha = max(alpha, best_score)
            variables.grid[i] = c
            if beta <= alpha: break
    else:  # minimizer
        if core_func.check_win():
            return 1  # previously maximizer
        elif not avails:
            return 0
        opponent = variables.valid_symbols - {variables.symbol}
        opponent = opponent.pop()
        best_score = float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = opponent
            t = tuple(variables.grid)
            if t not in variables.states:
                variables.states[t] = minimax(new_depth, alpha, beta)
            best_score = min(best_score, variables.states[t])
            beta = min(beta, best_score)
            variables.grid[i] = c
            if beta <= alpha: break
    return best_score

.

If someone needed, I added all the code to run the game below:

core_func.py

import variables
import turns_func


def update_grid():
    print(f' {variables.grid[0]} | {variables.grid[1]} | {variables.grid[2]} \n'
          f'---+---+---\n'
          f' {variables.grid[3]} | {variables.grid[4]} | {variables.grid[5]} \n'
          f'---+---+---\n'
          f' {variables.grid[6]} | {variables.grid[7]} | {variables.grid[8]} \n')


def correct_input(message, lst):
    i = int(input(f'{message}: '))
    if i not in lst:
        print(f'input error: input is not valid. valid inputs: {lst}')
        return correct_input(message, lst)
    return i


def initialization():
    option = correct_input(variables.initialization_message, variables.valid_initials)
    if option in {1, 4, 5}:
        variables.turn = 'H'
    else:
        variables.turn = 'C'
    if option in {2, 4}:
        variables.difficulty = 'R'
    if option in {3, 5}:
        variables.difficulty = 'P'
    print('Coordinates of the grid:')
    update_grid()


def check_win():
    for x, y, z in (0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6):
        if variables.grid[x] == variables.grid[y] == variables.grid[z]:
            return True
    return False


def availables():
    availables = []
    for c in variables.grid:
        if c not in variables.valid_symbols:
            availables.append(c)
    return availables


def get_coordinate():
    if not variables.difficulty:
        return turns_func.human_turn()
    elif variables.turn == 'H':
        variables.turn = 'C'
        return turns_func.human_turn()
    else:
        print(f'computer ({variables.symbol}):')
        variables.turn = 'H'
        if variables.difficulty == 'R':
            return turns_func.random_turn()
        else:
            return turns_func.perfect_turn()


def play():
    coordinate = get_coordinate()
    variables.grid[coordinate - 1] = variables.symbol
    update_grid()
    variables.moves += 1
    if check_win():
        print(f'"{variables.symbol}" wins in {variables.moves} moves!')
    elif not availables():
        print('"Game Ties"')
    else:
        changed = variables.valid_symbols - {variables.symbol}
        variables.symbol = changed.pop()
        play()

.

turns_func.py

import variables
import core_func
from random import choice


def human_turn():
    return core_func.correct_input(variables.get_coordinate_message + variables.symbol, core_func.availables())


def random_turn():
    return choice(core_func.availables())


# alpha - maximizer's max - it should be less than its parent node minimizer's min
# beta - minimizer's min - it should be greater than its parent node maximizer's max


def minimax(depth, alpha, beta):
    new_depth = depth + 1
    avails = core_func.availables()
    if new_depth % 2:  # maximizer
        if core_func.check_win():
            return -1  # previously minimizer
        elif not avails:
            return 0
        best_score = -float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = variables.symbol
            t = tuple(variables.grid)
            if t not in variables.states:
                variables.states[t] = minimax(new_depth, alpha, beta)
            best_score = max(best_score, variables.states[t])
            alpha = max(alpha, best_score)
            variables.grid[i] = c
            if beta <= alpha: break
    else:  # minimizer
        if core_func.check_win():
            return 1  # previously maximizer
        elif not avails:
            return 0
        opponent = variables.valid_symbols - {variables.symbol}
        opponent = opponent.pop()
        best_score = float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = opponent
            t = tuple(variables.grid)
            if t not in variables.states:
                variables.states[t] = minimax(new_depth, alpha, beta)
            best_score = min(best_score, variables.states[t])
            beta = min(beta, best_score)
            variables.grid[i] = c
            if beta <= alpha: break
    return best_score


def perfect_turn():
    max_score = -float('inf')
    coordinate = None
    for c in variables.grid:
        if c in variables.valid_symbols: continue
        i = c - 1
        variables.grid[i] = variables.symbol
        score = minimax(1, -float('inf'), float('inf'))
        variables.grid[i] = c
        if score > max_score:
            max_score = score
            coordinate = c
    return coordinate

.

variables.py

initialization_message = ('1. Play with Human\n'
                          'Play with computer:\n'
                          'For computer going first, Difficulties:\n'
                          '\t2. Random\n'
                          '\t3. Perfect\n'
                          'For computer going second, Difficulties:\n'
                          '\t4. Random\n'
                          '\t5. Perfect\n'
                          'Choose an option. Enter the number')

get_coordinate_message = 'Input a coordinate for '

valid_initials = (1, 2, 3, 4, 5)
grid = [1, 2, 3, 4, 5, 6, 7, 8, 9]
valid_symbols = {'X', 'O'}
symbol = 'X'
difficulty = None
turn = None
moves = 0
states = {}

.

main.py

import core_func

core_func.initialization()
core_func.play()

Solution

  • The problem is indeed in the combination of memoization ("hashing") and alpha-beta pruning. The key t you use represents the state of the grid, but does not include the alpha/beta window that is currently in effect. When the value returned by minimax results from an alpha-beta cut (a break), then it is not guaranteed to be an accurate value for the grid, just accurate enough to choose another move higher up the recursion tree, yet if you use memoization, you'll want the value to be based on a complete evaluation (without break), as in another situation (when encountering that state again), the alpha-beta window might be different, which would require a value that results from no early exit (no break).

    So this is why it works when you remove one of the two features: memoization or alpha-beta pruning.

    To have both features activated, you have some options. I would suggest that you don't store a score in your states dict if its evaluation loop was prematurely exited because of pruning.

    Here is your code adapted to do just that:

    def minimax(depth, alpha, beta):
        new_depth = depth + 1
        avails = core_func.availables()
        count = 0 # count the number of evaluated variants
        if new_depth % 2:  # maximizer
            if core_func.check_win():
                return -1  # previously minimizer
            elif not avails:
                return 0
            best_score = -float('inf')
            for c in avails:
                i = c - 1
                variables.grid[i] = variables.symbol
                t = tuple(variables.grid)
                score = variables.states[t] if t in variables.states else minimax(new_depth, alpha, beta)
                best_score = max(best_score, score)
                alpha = max(alpha, best_score)
                variables.grid[i] = c
                count += 1 # update counter
                if beta <= alpha:
                    break
        else:  # minimizer
            opponent = variables.valid_symbols - {variables.symbol}
            opponent = opponent.pop()
            if core_func.check_win():
                return 1  # previously maximizer
            elif not avails:
                return 0
            best_score = float('inf')
            for c in avails:
                i = c - 1
                variables.grid[i] = opponent
                t = tuple(variables.grid)
                score = variables.states[t] if t in variables.states else minimax(new_depth, alpha, beta)
                best_score = min(best_score, score)
                beta = min(beta, best_score)
                variables.grid[i] = c
                count += 1 # update counter
                if beta <= alpha:
                    break
        if count == len(avails):  # We did a complete evaluation
            # Only then update variables.state
            t = tuple(variables.grid)
            variables.states[t] = best_score
        return best_score