Search code examples
pythongenetic-algorithmroulette-wheel-selection

How to avoid selecting parents 'twice' using Roulette Wheel Selection?


I am working on a genetic algorithm in Python, where I want to use the roulette wheel selection for selecting parents. However, I came to the conclusion that with my current code, it is possible that certain parents are selected multiple times, however I want to avoid this.

Here is the first part of my code: The part where I am struggling is in ' roulette wheel selection'''.

import numpy as np
import random
import time
import copy

'''Initialisation settings'''

num_jobs = 20  # number of jobs

proc_time = [10, 10, 13, 4, 9, 4, 8, 15, 7, 1, 9, 3, 15, 9, 11, 6, 5, 14, 18, 3]
due_dates = [12, 40, 50, 16, 20, 105, 73, 45, 6, 64, 15, 6, 92, 43, 78, 21, 15, 50, 150, 99]

# inputs
population_size = int(10)  # size of the population
crossover_rate = float(0.8)
mutation_rate = float(0.2)
mutation_selection_rate = float(0.5)
num_mutation_jobs = round(num_jobs * mutation_selection_rate)
num_iteration = int(2000)  # amount of iterations for the GA

start_time = time.time()

'''----- Generate the initial population -----'''
Tbest = 999999999999999
best_list, best_obj = [], []
population_list = []
for i in range(population_size):
    random_num = list(np.random.permutation(num_jobs))  # generate a random permutation of 0 to num_jobs
    population_list.append(random_num)  # add to the population_list
#print(population_list)

''' Fitness value of the initial population'''
total_chromosome = copy.deepcopy(population_list)  #initial population
chrom_fitness, chrom_fit = [], []
total_fitness = 0
num_tardy=0
for i in range(population_size): # solutions (chromosomes)
    ptime = 0
    tardiness = 0
    for j in range(num_jobs): # genes in the chromosome
        ptime = ptime + proc_time[total_chromosome[i][j]] # proc time is sum of the processing times of the genes, in the order that the genes appear in the chromosome
        tardiness = tardiness + max(ptime - due_dates[total_chromosome[i][j]], 0) # calc tardiness of each gene (job) in a chromosome (sequence/solution)
        if ptime >= due_dates[total_chromosome[i][j]]: # if due date is exceeded, the job is tardy
            num_tardy = num_tardy + 1
    chrom_fitness.append(num_tardy)
    chrom_fit.append(num_tardy)
    total_fitness = total_fitness + chrom_fitness[i] # total sum of the fitness values of the chromosomes
    num_tardy=0
    #print('chrom_fitness')
    #print(chrom_fitness)

'''Rank the solutions best to worst'''
chrom_fitness_rank = copy.deepcopy(chrom_fitness)
chrom_fitness_rank = np.array(chrom_fitness_rank)
#print(chrom_fitness_rank)

combined = zip(chrom_fitness_rank, population_list)
zip_sort = sorted(combined, key=lambda x: x[0])
chrom_fitness_rank, population_list = map(list,zip(*zip_sort))
#print(chrom_fitness_rank)
#print(population_list)

'''Do the required amount of iterations'''
for n in range(num_iteration):
    Tbest_now = 99999999999
    '''----------Roulette wheel selection----------'''
    parent_list = copy.deepcopy(population_list)
    pk, qk = [], []
    for i in range(population_size):
        pk.append(chrom_fitness[i] / total_fitness) #chrom_fitness/total_fitness for each solution/sequence, relative fitness
        cum_prob = [sum(pk[:i + 1]) for i in range(len(pk))] # get cumulative probabilities

    parent_number = population_size
    chosen = []
    for n in range(parent_number):
        r=random.random()
        for (i, individual) in enumerate(population_list):
            if cum_prob[i]>=r:
                chosen.append(list(individual))
                break
        #print(r)
    print('choose')
    print(chosen)

I thought about setting the fitness value of the chosen individual to a very high value (999999) (in my case a lower fitness value is 'better') so that there is a very low chance that this individual is selected again. However, I am not sure how to do this.


Solution

  • Just keep track which individuals were already selected by putting their indices into a set (already_selected). Then select only when cum_prob[i]>=r and i not in already_selected.

    parent_number = population_size
    chosen = []
    already_selected = set()
    for n in range(parent_number):
        r=random.random()
        for (i, individual) in enumerate(population_list):
            if cum_prob[i]>=r and i not in already_selected:
                chosen.append(list(individual))
                already_selected.add(i)
                break