Search code examples
pythonpython-3.xminimax

minimax function not generating all game states


Can anyone tell me why my code prints 1 and not 8? It seems to not be going through very single state. Why is that?

using the minimax algorithm find the best possible move to make based on a game state, a possible tic tac toe board. Usually, it would branch off into a large tree of game states, each new branch called when the game doesn't end on an ending state, repeated, then finding the best possible move by recursively going down the tree finding the best moves for each player.

I was following the "tutorial" at http://giocc.com/concise-implementation-of-minimax-through-higher-order-functions.html.

My code:

#!/usr/bin/env python3
'''Minimax finds the best possible moves by applying a set of rules.
A win = 1, tie = 0, loss = -1 (for us). Assuming that each player chooses the best move
(we choose 1 if possible, opponent chooses -1). Starting at the top of a 'game tree',
generate the possible moves we can make. If It reaches a terminal state, stop. Otherwise keep searching in depth.
We find max.
'''
#[0,1,2,3,4,5,6,7,8]
class GameState: #a game state is a certain state of the board
    #http://stackoverflow.com/questions/1537202/variables-inside-and-outside-of-a-class-init-function
    x_went_first = True
    def __init__(self,board):
        self.board = board
        self.winning_combos = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]
    def is_gameover(self):
        if self.board.count('X') + self.board.count('O') == 9:
            return True
        for combo in self.winning_combos:
            if (self.board[combo[0]] == 'X' and self.board[combo[1]] == 'X' and self.board[combo[2]] == 'X') or (self.board[combo[0]] == 'O' and self.board[combo[1]] == 'O' and self.board[combo[2]] == 'O'):
                return True
        return False
    def get_possible_moves(self):
        squares = []
        for square in self.board:
            if square != 'X' and square != 'O':
                squares.append(int(square))
        return squares
    def get_next_state(self, move):
        copy = self.board
        num_of_x = copy.count('X')
        num_of_o = copy.count('O')
        #x starts, o's turn 1 > 0 o's turn
        #o starts, x's turn 1 < 0 x's turn
        #x starts, x's turn 1 > 1 
        #o starts, o's turn 1 < 1
        if (self.x_went_first and num_of_x > num_of_o) or (self.x_went_first is not True and num_of_o == num_of_x):
            copy[move] = 'O'
        else:
            copy[move] = 'X'
        return GameState(copy)

def evals(game_state):
    for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,8]]:
        if game_state.board[0] == 'X' and game_state.board[1] == 'X' and game_state.board[2] == 'X':
            return 1    
        elif game_state.board[0] == 'O' and game_state.board[1] == 'O' and game_state.board[2] == 'O':
            return -1
        else:
            return 0



def min_play(game_state):
    if game_state.is_gameover():
        return evals(game_state)
    moves = game_state.get_possible_moves()
    best_move = moves[0]
    best_score = 2 #not possible, best score is -1
    for move in moves:
        clone = game_state.get_next_state(move)
        score = max_play(clone)
        if score < best_score:
            best_move = move
            best_score = score
    return best_score

def max_play(game_state):
    if game_state.is_gameover():
        return evals(game_state)
    moves = game_state.get_possible_moves()
    best_score = -2 #not possible, best score is 1
    for move in moves:
        clone = game_state.get_next_state(move)
        score = min_play(clone)
        if score > best_score:
            best_move = move
            best_score = score
    return best_score

def minimax(game_state):
    moves = game_state.get_possible_moves()
    best_move = moves[0]
    best_score = -2
    for move in moves:
        clone = game_state.get_next_state(move)
        score = min_play(clone)
        if score > best_score:
            best_move = move
            best_score = score
    return best_move


game = GameState(['X',1,2,
                    3,'O',5,
                    6,7,8])
print(minimax(game))

Solution

  • My evals was always returning 0, and one of the winning combinations was messed up. New evals:

    def evals:
        for combo in [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]:
            if game_state.board[0] == 'X' and game_state.board[1] == 'X' and game_state.board[2] == 'X':
                return 1    
            elif game_state.board[0] == 'O' and game_state.board[1] == 'O' and game_state.board[2] == 'O':
                return -1
         return 0
    

    I also modified it so the index isn't in every empty slot. View the full code at https://github.com/retep-mathwizard/pyai/blob/master/minimax_ttt