Data set is below
storeid,revenue,profit,country
101,11434,2345,IN
101,12132,3445,US
102,21343,4545,CH
103,34423,3432,CH
103,43435,3234,JP
103,34345,3335,IN
Code is below
import pandas as pd
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from pylab import rcParams
from collections import Counter
sns.set(rc={'figure.figsize':(11.7,8.27)})
sns.set_style('whitegrid')
df = pd.read_csv('1.csv',index_col=None)
df.head()
df.columns = df.columns.str.replace(' ', '')
dummies = pd.get_dummies(data = df)
del dummies['Unnamed:0']
model = DBSCAN(eps = 2.25, min_samples=19).fit(dummies)
print (model)
target = dummies.iloc[:,0]
data = dummies.iloc[:,1:-1]
outliers_df = pd.DataFrame(data)
print (Counter(model.labels_))
print(outliers_df(model.labels_==-1))
print(outliers_df(model.labels_==-1))
throwing TypeError: 'DataFrame' object is not callable
Use boolean indexing
with []
for filter by mask:
print(outliers_df[model.labels_==-1])
revenue profit country_CH country_IN country_JP
0 11434 2345 0 1 0
1 12132 3445 0 0 0
2 21343 4545 1 0 0
3 34423 3432 1 0 0
4 43435 3234 0 0 1
5 34345 3335 0 1 0