How to iterate a function with strings over a pandas dataframe

I want to get the Jaccard Similarity between my dataframe and the base. The issue is I need it for 500+ rows and I either get the error message: "too many values to unpack", 'Series' object has no attribute 'iterrows' or the functions compares the base witht the dataframe as a whole.

Alternative A:

sentences = pd.Series(df.sentence)
sentences = sentences.str.replace('[^A-z ]','').str.replace(' +',' ').str.strip()
splitwords = [ nltk.word_tokenize( str(sentence) ) for sentence in sentences ]
print(splitwords)
sentence = df.sentence
def Jaccard_Similarity(base, sentence):
    for i, row in sentence.iterrows():
        a = set(word for word in base)
        b = set(word for word in df.sentence())
        c = a.intersection(b)
        return(float(len(c)) / (len(a) + len(b) - len(c)), a, b)
Jaccard_Similarity(base, sentence)

Alternative B:

df = df.apply(lambda row: nltk.word_tokenize(row['sentence']), axis=1)
print(df)

def Jaccard_Similarity(bas, df):
    for row in df.iterrows(df):
        a = set(word for word in base)
        b = set(word for word in df)
        c = a.intersection(b)
        return(float(len(c)) / (len(a) + len(b) - len(c)), a, b)
Jaccard_Similarity(base, df)

Data:

base = ['Tom', 'eats', 'apple']    
df = (["Tom eats an apple"],
          ["Tom eats a pineapple"],
          ["Eva eats an apple"],
          ["Eva eats a pineapple"],
         columns = 'sentence')

EDIT:

   base = set(base.lower().split()) 
   df = set(df.lower().split())

def Jaccard_Similarity(base, df): 
    intersection = base.intersection(df)
    union = base.union(df)
    return float(len(intersection)) / len(union)

Solution

Try this - I'll add the explanation later need some work to do.

import nltk
from nltk.corpus import stopwords # to remove stopwords

base = ['Tom', 'eats', 'apple']
base = [item.lower() for item in base]
stop_words = set(stopwords.words('english')) 
list1 = [["Tom eats an apple"],
          ["Tom eats a pineapple"],
          ["Eva eats an apple"],
          ["Eva eats a pineapple"]]
df = pd.DataFrame(list1, columns= ['sentence'])
df = df.sentence.apply(nltk.word_tokenize)
df = df.apply(
    lambda x: [item.lower() for item in x if item.lower() not in stop_words]
)
b = df.apply(set)
a = set(base)
c =  b.apply(lambda x : a.intersection(x))
len_a_b = b.apply(lambda x : len(x) +  len(a))
len_c  = c.apply(lambda x : len(x))
dict1 = {'length' : len_c / (len_a_b - len_c) , 'b' : b , 'c' : c}
import numpy as np
df = pd.DataFrame(dict1)
df['a'] = np.NAN
df['a'] = df.a.apply(lambda x: a)
print(df)

output -

   length                       b                   c                   a
0     1.0      {apple, eats, tom}  {apple, eats, tom}  {apple, eats, tom}
1     0.5  {eats, tom, pineapple}         {eats, tom}  {apple, eats, tom}
2     0.5      {apple, eats, eva}       {apple, eats}  {apple, eats, tom}
3     0.2  {eats, pineapple, eva}              {eats}  {apple, eats, tom}