I am currently trying to clean a csv of Twitter data as part of a pre-processing step. I currently get the error:
Traceback (most recent call last):
File "...main.py", line 192, in <module>
obj = TwitterApplication(root)
File "...main.py", line 88, in __init__
twitter = self.filterTweetList(df['text'])
TypeError: filterTweetList() takes 1 positional argument but 2 were given
I saw a similar question asked (TypeError: method() takes 1 positional argument but 2 were given) with the main suggestion being to add "self" as the first argument yet I still end up with this error.
The error appears at twitterData = self.filterTweetList(df['text'])
.
My current code:
class MainApplication:
def __init__(self, root):
df = pd.read_csv("test.csv")
if len(df) == 0:
msg.showinfo('No Rows Selected')
else:
twitterData = self.filterTweetList(df['text'])
print(twitterData[1]) # Call in first cleaned tweet
# Functions for data cleaning
def removePunc(myWord):
"""Function to remove punctuation from string inputs"""
if myWord is None:
return myWord
else:
return re.sub('[.:;()/!&-*@$,?^\d+]', '', myWord)
def removeAscii(myWord):
"""Function to remove ascii from string input"""
if myWord is None:
return myWord
else:
return str(re.sub(r'[^\x00-\x7F]+', '', myWord.decode('utf-8').strip()))
def lemmatize(myWord):
wnl = WordNetLemmatizer()
"""Function to lemmatize words"""
if myWord is None:
return myWord
else:
return str(wnl.lemmatize(myWord))
def removeStopWords(myWord):
"""Function to remove stop words"""
newStopWords = ['amp', 'rt', 'https', 'http']
stopwords.append(newStopWords)
if myWord is None:
return myWord
if myWord not in str(stopwords.words('english')):
return myWord
def removeLinkUser(myWord):
"""Function to remove web addresses and twitter handles"""
if not myWord.startswith('@') and not myWord.startswith('http'):
return myWord
def prepText(myWord):
"""Final text pre-processing function"""
return myWord.removeStopWords(
myWord.lemmatize(
myWord.removeAscii(
myWord.removePunc(
myWord.removeLinkUser(
myWord.lower()
)
)
)
)
)
def filterTweetList(tweetList):
"""Remove stop words, lemmatize, and clean all tweets"""
return [[tweetList.prepText(word) for word
in tweet.split()
if tweetList.prepText(word) is not None]
for tweet in tweetList]
In python, every method within a class takes self
at the first argument. So simply replace def removePunc(myWord)
with def removePunc(self, myWord)
and continue that for all of the methods within the class.