Search code examples
pythonsortingfor-loopglobal-variablesnested-loops

My python dictionary is not updating properly


I am working on a project where I want to capture some data from different types of sorting algorithms and I planned on doing this by saving all the data into a dictionary and then converting that dictionary into a pandas data frame and eventually exporting it as a csv file.

The problem now is that I am updating my dictionary inside 4 for loops but it seems that for some reason the updating is being overwritten somewhere in my code at first I thought it was the global keyword that I was using to keep track of data comparison and data swap count but I am not sure I tried my best to look for moments that interfere with my global variable but I can't find anything can you please help?

My code looks something like this:


def merge_sort(array):
    #apparently merge sort only has data assignment without no swaps and will create a new array so we can exclude data swap count
    if len(array) <= 1:
        return
    mid = len(array) // 2
    left = array[:mid].copy()
    right = array[mid:].copy()
    merge_sort(left)
    merge_sort(right)

    x,y=merge(array, left, right)
    global comparison_counter,swap_counter
    comparison_counter+=x
    swap_counter+=y

    return comparison_counter,swap_counter
def merge(array, lefthalf, righthalf):
    i=0
    j=0
    k=0
    global data_comparison_count, data_swap_count
    while i < len(lefthalf) and j < len(righthalf):
        #underneath is comparison
        if lefthalf[i] < righthalf[j]:#data comparison
            array[k]=lefthalf[i]
            i=i+1
        else:
            array[k]=righthalf[j]
            j=j+1
        data_comparison_count+=1
        k=k+1
    while i < len(lefthalf):

        array[k]=lefthalf[i]
        i=i+1
        k=k+1
    while j < len(righthalf):

        array[k]=righthalf[j]
        j=j+1
        k=k+1
    return data_comparison_count,data_swap_count



def partition(array, start_index, end_index):
    global data_comparison_count, data_swap_count
    pivot_value = array[start_index]
    left_mark = start_index + 1
    right_mark = end_index
    while True:
        while left_mark <= right_mark and array[left_mark] <= pivot_value:#comparison
            data_comparison_count+=1
            left_mark = left_mark + 1
        while array[right_mark] >= pivot_value and right_mark >= left_mark:#comparison
            data_comparison_count+=1
            right_mark = right_mark - 1
        if right_mark < left_mark:
            break
        else:
            #data_swap=1

            temp = array[left_mark]
            array[left_mark] = array[right_mark]
            array[right_mark] = temp
            data_swap_count += 1
    #data_swap
    data_swap_count+=1
    array[start_index] = array[right_mark]
    array[right_mark] = pivot_value
    return right_mark,data_comparison_count,data_swap_count


def quick_sort(array):
    temp1,temp2=quick_sort_helper(array, 0, len(array) - 1)
    return temp1,temp2

def quick_sort_helper(array, start_index, end_index):
    global comparison_counter, swap_counter
    if start_index < end_index:
        split_point,x,y = partition(array, start_index, end_index)
        comparison_counter+=x
        swap_counter+=y
        quick_sort_helper(array, start_index, split_point - 1)
        quick_sort_helper(array, split_point + 1, end_index)

    return comparison_counter,swap_counter

time_and_data_dictionary={"Time":12,"Data Comparisons":12,"Data Swaps":12}
selection_sort_information={"selection sort":time_and_data_dictionary}
bubble_sort_information={"bubble sort":time_and_data_dictionary}
insertion_sort_information={"insertion sort":time_and_data_dictionary}
shell_sort_information={"shell sort":time_and_data_dictionary}
quick_sort_information={"quick sort":time_and_data_dictionary}
merge_sort_information={"merge sort":time_and_data_dictionary}
array_of_sorting_algorithms=[selection_sort_information,bubble_sort_information,insertion_sort_information,shell_sort_information,quick_sort_information,merge_sort_information]

dictionary={"Ascending_Sorted_250":array_of_sorting_algorithms,"Ascending_Sorted_500":array_of_sorting_algorithms,"Ascending_Sorted_1000": array_of_sorting_algorithms,"Ascending_Sorted_10000":array_of_sorting_algorithms,"Descending_Sorted_250":array_of_sorting_algorithms,"Descending_Sorted_500":array_of_sorting_algorithms," Descending_Sorted_1000": array_of_sorting_algorithms,"Descending_Sorted_10000":array_of_sorting_algorithms,"Random_Sorted_250":array_of_sorting_algorithms,"Random_Sorted_500":array_of_sorting_algorithms," Random_Sorted_1000":array_of_sorting_algorithms,"Random_Sorted_10000":array_of_sorting_algorithms}


t="Time"
dc="Data Comparisons"
ds="Data Swaps"
start = time()
tuple_selection_sort_ad250 = selection_sort_array(ascending_data_250)
end = time()
td_selection_sort_ad250 = end - start

start = time()
tuple_bubble_sort_ad250 = bubble_sort_array(ascending_data_250)
end = time()
td_bubble_sort_ad250 = end - start

start = time()
tuple_insertion_sort_ad250 = insertion_sort_array(ascending_data_250)
end = time()
td_insertion_sort_ad250 = end - start

start = time()
tuple_shell_sort_ad250 = shell_sort_array(ascending_data_250)
end = time()
td_shell_sort_ad250 = end - start

data_comparison_count = 0
data_swap_count = 0
comparison_counter = 0
swap_counter = 0
start = time()
tuple_merge_sort_ad250 = merge_sort(ascending_data_250)
end = time()
td_merge_sort_ad250 = end - start

data_comparison_count = 0
data_swap_count = 0
comparison_counter = 0
swap_counter = 0
start = time()
tuple_quick_sort_ad250 = quick_sort(ascending_data_250)
end = time()
td_quick_sort_ad250 = end - start

start = time()
tuple_selection_sort_ad500 = selection_sort_array(ascending_data_500)
end = time()
td_selection_sort_ad500 = end - start
start = time()
tuple_bubble_sort_ad500 = bubble_sort_array(ascending_data_500)
end = time()
td_bubble_sort_ad500 = end - start
start = time()
tuple_insertion_sort_ad500 = insertion_sort_array(ascending_data_500)
end = time()
td_insertion_sort_ad500 = end - start
start = time()
tuple_shell_sort_ad500 = shell_sort_array(ascending_data_500)
end = time()
td_shell_sort_ad500 = end - start

data_comparison_count = 0
data_swap_count = 0
comparison_counter = 0
swap_counter = 0
start = time()
tuple_merge_sort_ad500 = merge_sort(ascending_data_500)
end = time()
td_merge_sort_ad500 = end - start

data_comparison_count = 0
data_swap_count = 0
comparison_counter = 0
swap_counter = 0
start = time()
tuple_quick_sort_ad500 = quick_sort(ascending_data_500)
end = time()
td_quick_sort_ad500 = end - start
start = time()
tuple_selection_sort_dd250 = selection_sort_array(descending_data_250)
end = time()
td_selection_sort_dd250 = end - start
start = time()
tuple_bubble_sort_dd250 = bubble_sort_array(descending_data_250)
end = time()
td_bubble_sort_dd250 = end - start
start = time()
tuple_insertion_sort_dd250 = insertion_sort_array(descending_data_250)
end = time()
td_insertion_sort_dd250 = end - start
start = time()
tuple_shell_sort_dd250 = shell_sort_array(descending_data_250)
end = time()
td_shell_sort_dd250 = end - start
for i,j in dictionary.items():
    for x,val in enumerate(j):
        for k,v in val.items():
            for y,z in v.items():


                if i=="Ascending_Sorted_250":
                    if x==0:


                            dictionary[i][x][k][t]=td_selection_sort_ad250
                            print(td_selection_sort_ad250,"!!!!!!!!!!!!!!!!!!!!!!")
                            dictionary[i][x][k][dc]=tuple_selection_sort_ad250[0]
                            print(tuple_selection_sort_ad250[0], "???????????????????????")
                            dictionary[i][x][k][ds]=tuple_selection_sort_ad250[1]
                            print(tuple_selection_sort_ad250[1], "////////////////////")
                    if x==1:
                            print("is",k,x)
                            print(i, x, k, t, dc, ds, type(i), type(x), type(k), type(t), type(dc), type(ds))
                            dictionary[i][x][k][t] = td_bubble_sort_ad250
                            print(td_bubble_sort_ad250,"!!!!!!!!!!!!!!!!!!!!!!")
                            dictionary[i][x][k][dc] = tuple_bubble_sort_ad250[0]

                            dictionary[i][x][k][ds] = tuple_bubble_sort_ad250[1]

                    if x==2:
                        print("going",k,x)
                        print(i, x, k, t, dc, ds, type(i), type(x), type(k), type(t), type(dc), type(ds))
                        dictionary[i][x][k][t] = td_insertion_sort_ad250
                        dictionary[i][x][k][dc] = tuple_insertion_sort_ad250[0]
                        dictionary[i][x][k][ds] = tuple_insertion_sort_ad250[1]
                    if x==3:
                        print("on",k,x)
                        print(i, x, k, t, dc, ds, type(i), type(x), type(k), type(t), type(dc), type(ds))
                        dictionary[i][x][k][t] = td_shell_sort_ad250
                        dictionary[i][x][k][dc] = tuple_shell_sort_ad250[0]
                        dictionary[i][x][k][ds] = tuple_shell_sort_ad250[1]
                    if x==4:
                        print("here",k,x)
                        print(i, x, k, t, dc, ds, type(i), type(x), type(k), type(t), type(dc), type(ds))
                        dictionary[i][x][k][t] = td_merge_sort_ad250
                        dictionary[i][x][k][dc] = tuple_merge_sort_ad250[0]
                        dictionary[i][x][k][ds] = tuple_merge_sort_ad250[1]
                    if x==5:
                        print("now",k,x)
                        print(i, x, k, t, dc, ds, type(i), type(x), type(k), type(t), type(dc), type(ds))
                        dictionary[i][x][k][t] = td_quick_sort_ad250
                        dictionary[i][x][k][dc] = tuple_quick_sort_ad250[0]
                        dictionary[i][x][k][ds] = tuple_quick_sort_ad250[1]
                if i=="Ascending_Sorted_500":
                    if x == 0:
                        dictionary[i][x][k][t] = td_selection_sort_ad500
                        print(td_selection_sort_ad250,"!!!!!!!!!!!!!!!!!!!!!!")
                        dictionary[i][x][k][dc] = tuple_selection_sort_ad500[0]
                        print(tuple_selection_sort_ad250[0], "???????????????????????")
                        dictionary[i][x][k][ds] = tuple_selection_sort_ad500[1]
                        print(tuple_selection_sort_ad250[1], "////////////////////")
                    if x == 1:
                        dictionary[i][x][k][t] = td_bubble_sort_ad500
                        print(td_bubble_sort_ad250,"!!!!!!!!!!!!!!!!!!!!!!")
                        dictionary[i][x][k][dc] = tuple_bubble_sort_ad500[0]
                        print(tuple_bubble_sort_ad250[0], "???????????????????????")
                        dictionary[i][x][k][ds] = tuple_bubble_sort_ad500[1]
                        print(tuple_bubble_sort_ad250[1], "////////////////////")
                    if x == 2:
                        dictionary[i][x][k][t] = td_insertion_sort_ad500
                        print(td_insertion_sort_ad250,"!!!!!!!!!!!!!!!!!!!!!!")
                        dictionary[i][x][k][dc] = tuple_insertion_sort_ad500[0]
                        print(tuple_insertion_sort_ad250[0], "???????????????????????")
                        dictionary[i][x][k][ds] = tuple_insertion_sort_ad500[1]
                        print(tuple_insertion_sort_ad250[1], "////////////////////")
                    if x == 3:
                        dictionary[i][x][k][t] = td_shell_sort_ad500
                        print(td_shell_sort_ad250,"!!!!!!!!!!!!!!!!!!!!!!")
                        dictionary[i][x][k][dc] = tuple_shell_sort_ad500[0]
                        print(tuple_shell_sort_ad250[0], "???????????????????????")
                        dictionary[i][x][k][ds] = tuple_shell_sort_ad500[1]
                        print(tuple_shell_sort_ad250[1], "////////////////////")
                    if x == 4:
                        dictionary[i][x][k][t] = td_merge_sort_ad500
                        print(td_merge_sort_ad250,"!!!!!!!!!!!!!!!!!!!!!!")
                        dictionary[i][x][k][dc] = tuple_merge_sort_ad500[0]
                        print(tuple_merge_sort_ad250[0], "???????????????????????")
                        dictionary[i][x][k][ds] = tuple_merge_sort_ad500[1]
                        print(tuple_merge_sort_ad250[1], "////////////////////")
                    if x == 5:
                        dictionary[i][x][k][t] = td_quick_sort_ad500
                        print(td_quick_sort_ad250,"!!!!!!!!!!!!!!!!!!!!!!")
                        dictionary[i][x][k][dc] = tuple_quick_sort_ad500[0]
                        print(tuple_quick_sort_ad250[0], "???????????????????????")
                        dictionary[i][x][k][ds] = tuple_quick_sort_ad500[1]
                        print(tuple_quick_sort_ad250[1], "////////////////////")


print(dictionary,"$$$$$$$$$$$$$$")

I tried initializing the variables every time I called the merge_sort and quick_sort functions, since I thought it was a problem with the globalization of my variables, I thought this would fix it, but this was far from the truth. I also tried to debug it using different statements but the output of my debug and the output for my dictionary was very different. Here is a snippet of what my console(output) looks like:

Console(output)

Console(output)2nd slide


Solution

  • The main thing I would look at would be to implement these instrumented sorting methods inside an enclosure so that you can track each incremented counter. There is still a kind of global version of these counters, but they are contained.

    Here is an example of one sort method with a couple of data sets. I will leave the proper counter incrementing to you but I kept the couple you had in already.

    from time import time
    
    def build_merge_sorter():
        ## ---------------------
        ## keeping tidy track of operations
        ## ---------------------
        data_comparison_count = 0
        data_swap_count = 0
        comparison_counter = 0
        swap_counter = 0
        ## ---------------------
    
        def helper(array, lefthalf, righthalf):
            ## ---------------------
            ## Let this method know that these variables come from someplace else
            ## ---------------------
            nonlocal data_comparison_count
            nonlocal data_swap_count
            nonlocal comparison_counter
            nonlocal swap_counter
            ## ---------------------
    
            i=0
            j=0
            k=0
    
            while i < len(lefthalf) and j < len(righthalf):
    
                ## ---------------------
                ## Is this +1 or +3?
                ## ---------------------
                data_comparison_count += 1
                ## ---------------------
    
                if lefthalf[i] < righthalf[j]:
                    array[k]=lefthalf[i]
                    i += 1
                else:
                    array[k]=righthalf[j]
                    j += 1
    
                k += 1
    
            while i < len(lefthalf):
                array[k] = lefthalf[i]
                i += 1
                k += 1
    
            while j < len(righthalf):
                array[k] = righthalf[j]
                j += 1
                k += 1
    
        def sorter(array):
            ## ---------------------
            ## Let this method know that these variables come from someplace else
            ## ---------------------
            nonlocal data_comparison_count
            nonlocal data_swap_count
            nonlocal comparison_counter
            nonlocal swap_counter
            ## ---------------------
    
            ## ---------------------
            ## initialize the counters for this sort
            ## ---------------------
            data_comparison_count = 0
            data_swap_count = 0
            comparison_counter = 0
            swap_counter = 0
            ## ---------------------
    
            ## ---------------------
            ## an array of 1 element is already sorted
            ## ---------------------
            comparison_counter += 1
            if len(array) <= 1:
                return (
                    array,
                    comparison_counter,
                    swap_counter,
                    data_comparison_count,
                    data_swap_count
                )
            ## ---------------------
    
            ## ---------------------
            ## Should these opperations have costs?
            ## ---------------------
            mid = len(array) // 2
            left = array[:mid]
            right = array[mid:]
            ## ---------------------
    
            ## ---------------------
            ## apply the sort to each half in-place so we can discard the result
            ## ---------------------
            sorter(left)
            sorter(right)
            ## ---------------------
    
            ## ---------------------
            ## Apply the help to shuffle data points about
            ## ---------------------
            helper(array, left, right)
            ## ---------------------
    
            return (
                array,
                comparison_counter,
                swap_counter,
                data_comparison_count,
                data_swap_count
            )
    
        return sorter
    
    SORTERS = {
        "Merge Sort": build_merge_sorter()
    }
    
    DATASETS = {
        "Ascending 250": list(range(250)),
        "Decending 250": list(reversed(range(250)))
    }
    
    ## ---------------------
    ## Run tests
    ## ---------------------
    test_results = {}
    for method_name, method in SORTERS.items():
        for dataset_name, dataset in DATASETS.items():
            dataset = dataset[:]
    
            start = time()
            (
                sorted_dataset,
                comparison_counter,
                swap_counter,
                data_comparison_count,
                data_swap_count
            ) = method(dataset)
            end = time()
    
            test_results.setdefault(method_name, {})[dataset_name] = {
                "time": end - start,
                "comparisons": comparison_counter,
                "swaps": swap_counter,
                "data_comparisons": data_comparison_count,
                "data_swaps": data_swap_count,
            }
    ## ---------------------
    
    import json
    print(json.dumps(test_results, indent=4))
    

    That is going to result in something like:

    {
        "Merge Sort": {
            "Ascending 250": {
                "time": 0.000995635986328125,
                "comparisons": 1,
                "swaps": 0,
                "data_comparisons": 249,
                "data_swaps": 0
            },
            "Decending 250": {
                "time": 0.0,
                "comparisons": 1,
                "swaps": 0,
                "data_comparisons": 251,
                "data_swaps": 0
            }
        }
    }