I am getting an error and I don't know what exactly I should do?!
The error message:
File "pandas_libs\writers.pyx", line 55, in pandas._libs.writers.write_csv_rows
UnicodeEncodeError: 'ascii' codec can't encode character u'\u2026' in position 147: ordinal not in range(128)
import numpy as np
import pandas as pd
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import subjectivity
from nltk.sentiment import SentimentAnalyzer
from nltk.sentiment.util import *
import matplotlib.pyplot as mlpt
import tweepy
import csv
import pandas as pd
import random
import numpy as np
import pandas as pd
import re
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth,wait_on_rate_limit=True)
fetch_tweets=tweepy.Cursor(api.search, q="#unitedAIRLINES",count=100, lang ="en",since="2018-9-13", tweet_mode="extended").items()
data=pd.DataFrame(data=[[tweet_info.created_at.date(),tweet_info.full_text]for tweet_info in fetch_tweets],columns=['Date','Tweets'])
data.to_csv("Tweets.csv")
cdata=pd.DataFrame(columns=['Date','Tweets'])
total=100
index=0
for index,row in data.iterrows():
stre=row["Tweets"]
my_new_string = re.sub('[^ a-zA-Z0-9]', '', stre)
cdata.sort_index()
cdata.set_value(index,'Date',row["Date"])
cdata.set_value(index,'Tweets',my_new_string)
index=index+1
#print(cdata.dtypes)
cdata
PANDAS is tripping up on handling Unicode data, presumably in generating a CSV output file.
One approach, if you don't really need to process Unicode data, is to simply make conversions on your data to get everything ASCII.
Another approach is to make a pass on your data prior to generating the CSV output file to get the UTF-8 encoding of any non-ASCII characters. (You may need to do this at the cell level of your spreadsheet data.)
I'm assuming Python3 here...
>>> s = "one, two, three, \u2026"
>>> print(s)
one, two, three, …
>>> ascii = str(s.encode("utf-8"))[2:-1]
>>> ascii
'one, two, three, \\xe2\\x80\\xa6'
>>> print(ascii)
one, two, three, \xe2\x80\xa6
See also: help()
on codecs
module.