Search code examples
pythonfunctionreturndefinition

How to return data from one definition to another one?


I am messing around with python, and am trying to make a simple data cleaning program. I'm trying to pass the title values from the read_excel module, to the output module. But, it keeps saying name title is not defined. Here is my code:

import os
import pandas as pd
import math

class Item():
    __name = ""
    __cost = 0
    __gender = ""
    __prime = ""

    def has_all_properties(self):
        return bool(self.__name and not math.isnan(self.__cost) and self.__gender and self.__prime)

    def clean(self,wanted_cost,wanted_gender,wanted_prime):
        return bool(self.__name and self.__gender == wanted_gender and self.__cost <= wanted_cost and self.__prime == wanted_prime)
    
    def __init__(self, name, cost, gender, prime):
        self.__name = name
        self.__cost = cost
        self.__gender = gender
        self.__prime = prime

    def __eq__(self, other):
        return (self.__name == other.__name and self.__cost == other.__cost and self.__gender == other.__gender and self.__prime == other.__prime)   
    def __hash__(self):
        return hash((self.__name, self.__cost, self.__gender, self.__prime))

    def __repr__(self):
        return f"Item({self.__name},{self.__cost},{self.__gender},{self.__prime})"

    def tuple(self): 
        return self.__name, self.__cost, self.__gender, self.__prime

def read_excel(filetype):
    cwd = os.path.abspath('') 
    files = os.listdir(cwd)  
    df = pd.DataFrame()
    for file in files:
        if file.endswith(filetype):
            df = df.append(pd.read_excel(file), ignore_index=True)
            df = df.where(df.notnull(), None)
            df = df[['name', 'cost', 'used_by', 'prime']]
    title = list(df.columns.values)
    print(title) 
    array = df.values.tolist()
    print(array)
    return array
    return output(title)

def process(array):
    mylist = {Item(*k) for k in array}
    print(mylist)
    filtered = {obj for obj in mylist if obj.has_all_properties()}
    clean = {obj for obj in filtered if obj.clean(20,"male","yes")}
    result = list(clean)
    print(result)
    
def output(where, sort_data, title):
    t_list = [obj.tuple() for obj in sort_data]
    output = pd.DataFrame(t_list, columns = title)
    output.to_excel(where, index = False, header = True)

if __name__ == "__main__":
    inputfile = read_excel('.XLSX')
    processdata = process(inputfile)
    result = output('clean_data.xlsx', processdata, title)

can you show me waht to do instead? Thank you for the help


Solution

  • I find one of the most easy to understand way of solving my current issue. So, I just break down the read_excel definition and make a get_header and get_list definition. Here is my solution:

    import os
    import pandas as pd
    import math
    
    class Item():
        __name = ""
        __cost = 0
        __gender = ""
        __prime = ""
    
        def has_all_properties(self):
            return bool(self.__name and not math.isnan(self.__cost) and self.__gender and self.__prime)
    
        def clean(self,wanted_cost,wanted_gender,wanted_prime):
            return bool(self.__name and self.__gender == wanted_gender and self.__cost <= wanted_cost and self.__prime == wanted_prime)
        
        def __init__(self, name, cost, gender, prime):
            self.__name = name
            self.__cost = cost
            self.__gender = gender
            self.__prime = prime
    
        def __eq__(self, other):
            return (self.__name == other.__name and self.__cost == other.__cost and self.__gender == other.__gender and self.__prime == other.__prime)   
        def __hash__(self):
            return hash((self.__name, self.__cost, self.__gender, self.__prime))
    
        def __repr__(self):
            return f"Item({self.__name},{self.__cost},{self.__gender},{self.__prime})"
    
        def tuple(self): 
            return self.__name, self.__cost, self.__gender, self.__prime
    
    def read_excel(filetype):
        cwd = os.path.abspath('') 
        files = os.listdir(cwd)  
        df = pd.DataFrame()
        for file in files:
            if file.endswith(filetype):
                df = df.append(pd.read_excel(file), ignore_index=True)
                df = df.where(df.notnull(), None)
                df = df[['name', 'cost', 'used_by', 'prime']]
        return df
    
    def get_list(dataframe):
        array = dataframe.values.tolist()
        print(array)
        return array
    
    def get_header(dataframe):
        title = list(dataframe.columns.values)
        print(title)
        return title
    
    def process(array):
        mylist = {Item(*k) for k in array}
        print(mylist)
        filtered = {obj for obj in mylist if obj.has_all_properties()}
        clean = {obj for obj in filtered if obj.clean(20,"male","yes")}
        result = list(clean)
        print(result)
        t_list = [obj.tuple() for obj in result]
        return t_list
    
        
    def output(where, sort_data, title): 
        output = pd.DataFrame(sort_data, columns = title)
        output.to_excel(where, index = False, header = True)
     
    if __name__ == "__main__":
        inputfile = read_excel('.XLSX')
        array = get_list(inputfile)
        header = get_header(inputfile)
        processdata = process(array)
        result = output('clean_data.xlsx', processdata, header)