i am wroking with pandas dataframe and i want to convert it to orange data table to impute missing values. my dataframe looks like
locationId rank Rating type value
1 1 10 shop 2.668
2 4 8 store 3.921
3 3 NAN shop 3.122
where rank is ordinal with repeating values between 1 and 5.Type is categorical with types shop,store etc and rating is integer type.value is floating point. i want to convert it into Orange data table and impute missing values. I had also viewed this but these functions gives me an error and not worked for me.
import numpy as np
import pandas as pd
import Orange
import csv
from io import StringIO
from collections import OrderedDict
from Orange.data import Table, Domain, ContinuousVariable, DiscreteVariable
def pandas_to_orange(df):
domain, attributes, metas = construct_domain(df)
orange_table = Orange.data.Table.from_numpy(domain = domain, X = df[attributes].values, Y = None, metas = df[metas].values, W = None)
return orange_table
def construct_domain(df):
columns = OrderedDict(df.dtypes)
attributes = OrderedDict()
metas = OrderedDict()
for name, dtype in columns.items():
if issubclass(dtype.type, np.number):
if len(df[name].unique()) >= 13 or issubclass(dtype.type, np.inexact) or (df[name].max() > len(df[name].unique())):
attributes[name] = Orange.data.ContinuousVariable(name)
else:
df[name] = df[name].astype(str)
attributes[name] = Orange.data.DiscreteVariable(name, values = sorted(df[name].unique().tolist()))
else:
metas[name] = Orange.data.StringVariable(name)
domain = Orange.data.Domain(attributes = attributes.values(), metas = metas.values())
return domain, list(attributes.keys()), list(metas.keys())
use -> pandas_to_orange(dataFrame)