I am trying to add a column of one-word categories from analyzing a column that contains a sentence in each row
I tried the following code but it kept giving me errors!
def loan_cat(row):
rows = df[df.columns[0]].count()
for i in rows:
data = df['purpose'][i]
if 'house' in data:
return 'house'
elif 'education' | 'university' in data:
return 'education'
elif 'wedding' in data:
return 'wedding'
elif 'car' in data:
return 'car'
elif 'real' in data:
return 'real estate'
elif 'property'in data:
return 'property'
return 'undefined'
df['purpose_1'] = df.apply(loan_cat, axis=1)
is there a better way to analyze and categorize the data?
I figured out the answer:
def loan_cat(value):
if 'hous' in value:
return 'House'
elif 'educ' in value:
return 'Education'
elif 'university' in value:
return 'Education'
elif 'wedding' in value:
return 'Wedding'
elif 'car' in value:
return 'Car'
elif 'real' in value:
return 'Real Estate'
elif 'property'in value:
return 'Property'
return 'undefined'
df['purpose_cat'] = df['purpose'].apply(lambda value: loan_cat(value))
print(df['purpose_cat'].value_counts())