Prisoner's dilemma strange results

I tried to implement a prisoner's dilemma in Python, but my results, instead of showing that tit for tat is a better solution, it is showing that defecting is giving better results.

Can someone look at my code, and tell me what I have done wrong here?

import random
from colorama import Fore, Style
import numpy as np
# Define the actions
COOPERATE = 'cooperate'
DEFECT = 'defect'

# Define the strategies
def always_cooperate(history):
    return COOPERATE

def always_defect(history):
    return DEFECT

def random_choice_cooperate(history):
    return COOPERATE if random.random() < 0.75 else DEFECT

def random_choice_defect(history):
    return COOPERATE if random.random() < 0.25 else DEFECT

def random_choice_neutral(history):
    return COOPERATE if random.random() < 0.5 else DEFECT

def tit_for_tat(history):
    if not history:  # If it's the first round, cooperate
        return COOPERATE
    opponent_last_move = history[-1][1]  # Get the opponent's last move
    return opponent_last_move  # Mimic the opponent's last move

def tat_for_tit(history):
    if not history:  # If it's the first round, cooperate
        return DEFECT
    opponent_last_move = history[-1][1]  # Get the opponent's last move
    return opponent_last_move  # Mimic the opponent's last move

def tit_for_two_tats(history):
    if len(history) < 2:  # If it's the first or second round, cooperate
        return COOPERATE
    opponent_last_two_moves = history[-2:]  # Get the opponent's last two moves
    if all(move[1] == DEFECT for move in opponent_last_two_moves):  # If the opponent defected in the last two rounds
        return DEFECT
    return COOPERATE

# Define the payoff matrix
payoff_matrix = {
    (COOPERATE, COOPERATE): (3, 3),
    (COOPERATE, DEFECT): (0, 5),
    (DEFECT, COOPERATE): (5, 0),
    (DEFECT, DEFECT): (1, 1)
}


# Define the players
players = [always_cooperate, always_defect, random_choice_defect, tit_for_tat, tit_for_two_tats, random_choice_cooperate, tat_for_tit, random_choice_neutral]

# Assign a unique color to each player
player_colors = {
    'always_cooperate': Fore.GREEN,
    'always_defect': Fore.RED,
    'tit_for_tat': Fore.BLUE,
    'random_choice_cooperate': Fore.MAGENTA,
    'random_choice_defect': Fore.LIGHTRED_EX,
    'tat_for_tit': Fore.LIGHTYELLOW_EX,
    'random_choice_neutral': Fore.WHITE,
    'tit_for_two_tats': Fore.LIGHTBLACK_EX,
}


def tournament(players, rounds=100):
    total_scores = {player.__name__: 0 for player in players}
    for i in range(len(players)):
        for j in range(i+1, len(players)):
            player1 = players[i]
            player2 = players[j]
            history1 = []
            history2 = []
            match_scores = {player1.__name__: 0, player2.__name__: 0}
            # print(f"\n{player1.__name__} vs {player2.__name__}")
            for round in range(rounds):
                move1 = player1(history1)
                move2 = player2(history2)
                score1, score2 = payoff_matrix[(move1, move2)]
                match_scores[player1.__name__] += score1
                match_scores[player2.__name__] += score2
                total_scores[player1.__name__] += score1
                total_scores[player2.__name__] += score2
                history1.append((move1, move2))
                history2.append((move2, move1))
            # print(f"{player1.__name__} moves: {''.join([Fore.GREEN+'O'+Style.RESET_ALL if move[0]==COOPERATE else Fore.RED+'X'+Style.RESET_ALL for move in history1])}")
            # print(f"{player2.__name__} moves: {''.join([Fore.GREEN+'O'+Style.RESET_ALL if move[0]==COOPERATE else Fore.RED+'X'+Style.RESET_ALL for move in history2])}")
            # print(f"Match scores: {player1.__name__} {match_scores[player1.__name__]}, {player2.__name__} {match_scores[player2.__name__]}")  
    sorted_scores = sorted(total_scores.items(), key=lambda item: item[1], reverse=True)
    return sorted_scores

# Run the tournament
# for player, score in tournament(players):
#     print(f'\nFinal score: {player}: {score}')

num_tournaments = 1000
results = {player.__name__: [] for player in players}

for _ in range(num_tournaments):
    for player, score in tournament(players):
        results[player].append(score)

# Calculate the median score for each player and store them in a list of tuples
medians = [(player, np.median(scores)) for player, scores in results.items()]

# Sort the list of tuples based on the median score
sorted_medians = sorted(medians, key=lambda x: x[1])

num_players = len(sorted_medians)

# Print the sorted median scores with gradient color
for i, (player, median_score) in enumerate(sorted_medians):
    # Calculate the ratio of green and red based on the player's position
    green_ratio = i / (num_players - 1)
    red_ratio = 1 - green_ratio

    # Calculate the green and red components of the color
    green = int(green_ratio * 255)
    red = int(red_ratio * 255)

    # Create the color code
    color_code = f'\033[38;2;{red};{green};0m'
    player_color = player_colors.get(player, Fore.RESET)
    # Print the player name and median score with the color
    print(f'{player_color}{player}: {median_score} coins')

The code itself create the matching for 100 rounds. But it then iterate 1000 times to get the median score over many iterations.

Here is the ouput of the results

always_cooperate: 1347.024 coins
random_choice_cooperate: 1535.651 coins
tit_for_two_tats: 1561.442 coins
tit_for_tat: 1609.444 coins
tat_for_tit: 1619.43 coins
random_choice_neutral: 1663.855 coins
always_defect: 1711.764 coins
random_choice_defect: 1726.992 coins

In the latest Veritasium video the dilemma is presented with the reward matrix, but Tit for Tat is presented as the most efficient solution. I cannot replicate that result, and thus I'm opening this question.

Solution

I think the problem lies in the setup of your tournament. It is set up in such a way that always_defect never has to play against always_defect. So a player of any type never plays against a player of the same type.

It seems to be an advantage to be the only always_defect in the group.

Modifying the lines

for i in range(len(players)):
    for j in range(i+1, len(players)):

for i in range(len(players)):
    for j in range(i, len(players)):

makes it so that an always_defect also has to play against an always_defect, which changes the picture.

However, I am not 100% sure that the accounting is done correctly for the case that a player type plays against a player of the same type.