I am trying to create a new DataFrame column that contains words that match between a list of keywords and strings in a df column...
data = {
'Sandwich Opinions':['Roast beef is overrated','Toasted bread is always best','Hot sandwiches are better than cold']
}
df = pd.DataFrame(data)
keywords = ['bread', 'bologna', 'toast', 'sandwich']
df['Matches'] = [df.apply(lambda x: ' '.join([i for i in df['Sandwich iOpinions'].str.split() if i in keywords]), axis=1)
This seems like it should do the job but it's getting stuck in endless processing.
for kw in keywords:
df[kw] = np.where(df['Sandwich Opinions'].str.contains(kw), 1, 0)
def add_contain_row(row):
contains = []
for kw in keywords:
if row[kw] == 1:
contains.append(kw)
return contains
df['contains'] = df.apply(add_contain_row, axis=1)
# if you want to drop the temp columns
df.drop(columns=keywords, inplace=True)