I try to create a ThreadPoolExecutor with two params where one of them is a dictionary.
My intention is to pass the whole dictionary as a param, but at the moment my multi_thread_load
loads the first player_ID with the first dictionary value and the second player_ID with the second dictionary value.
Is there a way to pass the whole dictionary as a param for each player_ID?
My goal is to have a class for each player_ID which contains a player_dictionary with several attributes like position, club, name, goals, games played etc. which will all be collected in get_base_data
and append the current league position of the club to the player_dictionary.
One work around would be to implement the scraping of the clubs league position to get_base_data
but then my code would scrape the website with the league position unnecessarily multiple times and I want to avoid that.
class Player:
def __init__(self, player_ID, ranks_dictionary):
self.player_ID = player_ID
self.club_ranks = ranks_dictionary
self.player_dictionary = {}
self.get_base_data()
self.get_club_rank()
def get_base_data(self):
# do something with self.player_ID
# and fill player_dictionary (Name, Club, Position, ...)
pass
def get_club_rank(self):
# get the national league rank from given dictionary
try:
rank = self.club_ranks[self.player_dictionary["club"]]
self.dictionary["club_rank"] = rank
except:
pass
import Player
import concurrent.futures
class Process:
def __init__(self):
self.dataset = []
def create_player_class(self, player_ID, ranks_dictionary):
player = Player.Player(player_ID, ranks_dictionary)
self.dataset.append(player.dictionary)
def multi_thread_load(self, given_set, club_ranks):
# creates multiple player classes at once & loads the data for those player
max_thread = 25 # a higher number increases processing speed
player_IDs = given_set
ranks_dictionary = club_ranks
threads = min(max_thread, len(player_IDs))
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(self.create_player_class, player_IDs, ranks_dictionary)
import Process
dictionary = {
"FC Bayern München": "1",
"Borussia Dortmund": "2",
"RB Leipzig": "3",
"Bayer 04 Leverkusen": "4",
"Borussia M'gladbach": "5",
"FC Augsburg": "6",
"VfB Stuttgart": "7",
"1. FC Union Berlin": "7",
"SV Werder Bremen": "9",
"Eintracht Frankfurt": "10",
"VfL Wolfsburg": "11",
"1899 Hoffenheim": "12",
"SC Freiburg": "13",
"Hertha BSC": "14",
"Arminia Bielefeld": "15",
"1. FC Köln": "16",
"FC Schalke 04": "17",
"1. FSV Mainz 05": "18",
}
player_ID_list = (32445, 31663, 31362, 32553) # Dummy list, in reality the list as around 500 elements
Process = Process.Process()
club_ranks = dictionary
Process.multi_thread_load(player_ID_list, club_ranks)
As you can probably see I don't have much experience with python/coding yet, so if you see any further mistakes I make or know a better solution, I would appreciate those comments as well.
Thanks!
The solution is to make the dictionary an instance variable of "Process" so all threads can access it without the need to pass it in multi_thread_load
.
import Player
import concurrent.futures
class Process:
def __init__(self):
self.dataset = []
self.club_table_dict = self.get_all_club_ranks()
def create_player_class(self, player_ID):
player = Player.Player(player_ID, self.club_table_dict)
self.dataset.append(player.dictionary)
def get_all_club_ranks(self):
# gets the current ranking
return dictionary
def multi_thread_load(self, given_set, club_ranks):
# creates multiple player classes at once & loads the data for those player
max_thread = 25 # a higher number increases processing speed
player_IDs = given_set
ranks_dictionary = club_ranks
threads = min(max_thread, len(player_IDs))
with concurrent.futures.ThreadPoolExecutor(max_workers=threads) as executor:
executor.map(self.create_player_class, player_IDs)