Search code examples
pythondictionarypython-3.8concurrent.futures

How to get concurrent.futures ThreadPoolExecutor work with a dictionary as one of two params?


I try to create a ThreadPoolExecutor with two params where one of them is a dictionary. My intention is to pass the whole dictionary as a param, but at the moment my multi_thread_load loads the first player_ID with the first dictionary value and the second player_ID with the second dictionary value.

Is there a way to pass the whole dictionary as a param for each player_ID?

My goal is to have a class for each player_ID which contains a player_dictionary with several attributes like position, club, name, goals, games played etc. which will all be collected in get_base_data and append the current league position of the club to the player_dictionary.

One work around would be to implement the scraping of the clubs league position to get_base_data but then my code would scrape the website with the league position unnecessarily multiple times and I want to avoid that.

file Player.py

class Player:
    def __init__(self, player_ID, ranks_dictionary):
        self.player_ID = player_ID
        self.club_ranks = ranks_dictionary
        self.player_dictionary = {}
        self.get_base_data()
        self.get_club_rank()

    def get_base_data(self):
        # do something with self.player_ID
        # and fill player_dictionary (Name, Club, Position, ...)
        pass

    def get_club_rank(self):
        # get the national league rank from given dictionary
        try:
            rank = self.club_ranks[self.player_dictionary["club"]]
            self.dictionary["club_rank"] = rank
        except:
            pass

file Process.py

import Player
import concurrent.futures

class Process:
    def __init__(self):
        self.dataset = []
        
    def create_player_class(self, player_ID, ranks_dictionary):
        player = Player.Player(player_ID, ranks_dictionary)
        self.dataset.append(player.dictionary)

    def multi_thread_load(self, given_set, club_ranks):
        # creates multiple player classes at once & loads the data for those player
        max_thread = 25  # a higher number increases processing speed
        player_IDs = given_set
        ranks_dictionary = club_ranks
        threads = min(max_thread, len(player_IDs))

        with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
            executor.map(self.create_player_class, player_IDs, ranks_dictionary)

execution file

import Process

dictionary = {
    "FC Bayern München": "1",
    "Borussia Dortmund": "2",
    "RB Leipzig": "3",
    "Bayer 04 Leverkusen": "4",
    "Borussia M'gladbach": "5",
    "FC Augsburg": "6",
    "VfB Stuttgart": "7",
    "1. FC Union Berlin": "7",
    "SV Werder Bremen": "9",
    "Eintracht Frankfurt": "10",
    "VfL Wolfsburg": "11",
    "1899 Hoffenheim": "12",
    "SC Freiburg": "13",
    "Hertha BSC": "14",
    "Arminia Bielefeld": "15",
    "1. FC Köln": "16",
    "FC Schalke 04": "17",
    "1. FSV Mainz 05": "18",
}
player_ID_list = (32445, 31663, 31362, 32553) # Dummy list, in reality the list as around 500 elements

Process = Process.Process()
club_ranks = dictionary

Process.multi_thread_load(player_ID_list, club_ranks)

As you can probably see I don't have much experience with python/coding yet, so if you see any further mistakes I make or know a better solution, I would appreciate those comments as well.

Thanks!


Solution

  • The solution is to make the dictionary an instance variable of "Process" so all threads can access it without the need to pass it in multi_thread_load.

    import Player
    import concurrent.futures
    
    class Process:
        def __init__(self):
            self.dataset = []
            self.club_table_dict = self.get_all_club_ranks()
            
        def create_player_class(self, player_ID):
            player = Player.Player(player_ID, self.club_table_dict)
            self.dataset.append(player.dictionary)
    
        def get_all_club_ranks(self):
            # gets the current ranking
            return dictionary
    
        def multi_thread_load(self, given_set, club_ranks):
            # creates multiple player classes at once & loads the data for those player
            max_thread = 25  # a higher number increases processing speed
            player_IDs = given_set
            ranks_dictionary = club_ranks
            threads = min(max_thread, len(player_IDs))
    
            with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
                executor.map(self.create_player_class, player_IDs)