Search code examples
pythonmissing-dataorange

convert pandas dataframe to Orange data table


i am wroking with pandas dataframe and i want to convert it to orange data table to impute missing values. my dataframe looks like

locationId rank Rating type  value
1          1    10     shop   2.668
2          4    8      store  3.921
3          3    NAN    shop   3.122

where rank is ordinal with repeating values between 1 and 5.Type is categorical with types shop,store etc and rating is integer type.value is floating point. i want to convert it into Orange data table and impute missing values. I had also viewed this but these functions gives me an error and not worked for me.


Solution

  • import numpy as np
    import pandas as pd
    import Orange
    import csv
    from io import StringIO
    from collections import OrderedDict
    from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable
    
    
    def pandas_to_orange(df):
        domain, attributes, metas = construct_domain(df)
        orange_table = Orange.data.Table.from_numpy(domain = domain, X = df[attributes].values, Y = None, metas = df[metas].values, W = None)
        return orange_table
    
    def construct_domain(df):
        columns = OrderedDict(df.dtypes)
        attributes = OrderedDict()
        metas = OrderedDict()
        for name, dtype in columns.items():
    
            if issubclass(dtype.type, np.number):
                if len(df[name].unique()) >= 13 or issubclass(dtype.type, np.inexact) or (df[name].max() > len(df[name].unique())):
                    attributes[name] = Orange.data.ContinuousVariable(name)
                else:
                    df[name] = df[name].astype(str)
                    attributes[name] = Orange.data.DiscreteVariable(name, values = sorted(df[name].unique().tolist()))
            else:
                metas[name] = Orange.data.StringVariable(name)
    
        domain = Orange.data.Domain(attributes = attributes.values(), metas = metas.values())
    
        return domain, list(attributes.keys()), list(metas.keys())
    

    use -> pandas_to_orange(dataFrame)