Search code examples
algorithmminmax

minmax algorithm with tictactoe, wrong decision


I have this code written in Dart that implements the minmax algorithm, With a tic-tac-toe, the computer is playing as "O" and trying to maximize the score, but I get some wrong decision like this, it says (look at the top, the first is the move coordinate and the second is the score) that I have to move in (0,2). But this is not the right decision.

enter image description here

This is my code:

class Ai {
  play(state) {
    return max_Value(state);
  }


  // the action parameter holds the move that got as to this state
  List max_Value(List state, {action}) {
    List v = [null, double.negativeInfinity];

    if (terminal(state)) {
      return [action, utility(state)];
    }

    for (var action in actions(state)) {
      if (v[1] == 1) return v;
      v = max(v, min_Value(result(state, action), action: action));
    }
    return v;
  }

  List min_Value(List state, {action}) {
    List v = [null, double.infinity];

    if (terminal(state)) {
      return [action, utility(state)];
    }

    for (var action in actions(state)) {
      if (v[1] == -1) return v;
      v = min(v, max_Value(result(state, action), action: action));
    }
    return v;
  }

  List min(List prv, List curr) {
    return prv[1] < curr[1] ? prv : curr;
  }

  List max(List prv, List curr) {
    return prv[1] > curr[1] ? prv : curr;
  }

  bool terminal(List state) {
    if (state[0][0] == state[0][1] &&
        state[0][0] == state[0][2] &&
        state[0][0] != "") {
      return true;
    }
    if (state[1][0] == state[1][1] &&
        state[1][0] == state[1][2] &&
        state[1][0] != "") {
      return true;
    }
    if (state[2][0] == state[2][1] &&
        state[2][0] == state[2][2] &&
        state[2][0] != "") {
      return true;
    }

    // vertical
    if (state[0][0] == state[1][0] &&
        state[0][0] == state[2][0] &&
        state[0][0] != "") {
      return true;
    }
    if (state[0][1] == state[1][1] &&
        state[0][1] == state[2][1] &&
        state[0][1] != "") {
      return true;
    }
    if (state[0][2] == state[1][2] &&
        state[0][2] == state[2][2] &&
        state[0][2] != "") {
      return true;
    }

    // diagonal
    if (state[0][0] == state[1][1] &&
        state[2][2] == state[0][0] &&
        state[0][0] != "") {
      return true;
    }

    if (state[2][0] == state[1][1] &&
        state[2][0] == state[0][2] &&
        state[2][0] != "") {
      return true;
    }

    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        if (state[i][j] == "") {
          return false;
        }
      }
    }
    return true;
  }

  double utility(List state) {
    String winner = "";
    // horizontal
    if (state[0][0] == state[0][1] && state[0][0] == state[0][2]) {
      winner = state[0][0];
    }
    if (state[1][0] == state[1][1] && state[1][0] == state[1][2]) {
      winner = state[1][0];
    }
    if (state[2][0] == state[2][1] && state[2][0] == state[2][2]) {
      winner = state[2][0];
    }

    // vertical
    if (state[0][0] == state[1][0] && state[0][0] == state[2][0]) {
      winner = state[0][0];
    }
    if (state[0][1] == state[1][1] && state[0][1] == state[2][1]) {
      winner = state[0][1];
    }
    if (state[0][2] == state[1][2] && state[0][2] == state[2][2]) {
      winner = state[0][2];
    }

    // diagonal
    if (state[0][0] == state[1][1] && state[2][2] == state[0][0]) {
      winner = state[0][0];
    }

    if (state[2][0] == state[1][1] && state[2][0] == state[0][2]) {
      winner = state[2][0];
    }

    if (winner == "O") {
      return 1;
    } else if (winner == "X") {
      return -1;
    } else {
      return 0;
    }
  }

  List actions(List state) {
    List acs = [];
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        if (state[i][j] == "") {
          acs.add([i, j]);
        }
      }
    }
    return acs;
  }

  List result(List state, List action) {
    List newState = [
      ["", "", ""],
      ["", "", ""],
      ["", "", ""]
    ];
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        newState[i][j] = state[i][j];
      }
    }
    newState[action[0]][action[1]] = player(state);
    return newState;
  }

  player(List state) {
    int xnum = 0;
    int onum = 0;
    for (int i = 0; i < 3; i++) {
      for (int j = 0; j < 3; j++) {
        if (state[i][j] == "X") {
          xnum++;
        } else if (state[i][j] == "O") {
          onum++;
        }
      }
    }
    if (xnum > onum) {
      return "O";
    } else {
      return "X";
    }
  }
}

Solution

  • Some mistakes:

    • This needs to depend on the player to move whether you return max or min:

      play(state) {
          return max_Value(state);
      }
      
    • Here (and same for min) v might become an action done later and not the actual action that was performed and lead to the better value.

        v = max(v, min_Value(result(state, action), action: action));
      

    Possible improvements:

    • With a little modification terminal could return the evaluation too and then you don't need the utility function.

    • You constantly evaluate player with the function. You should have to do that at most once (see first mistake). min and max know whether they are playing X or O. Everything could be simplified a lot and the number of functions reduced without increasing their complexity.