I want to save arabic tweets with all related data such as tweet id, user location, user followers etc.
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
import json
import sys
import codecs
import time
ConsumerKey = ' '
ConsumerSecret = ' '
AccessToken = ' '
AccessTokenSecret = ' '
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
class StdOutListener(StreamListener):
def on_data(self, data):
try:
all_data = json.loads(data)
tweet = all_data["text"]
print(tweet.translate(non_bmp_map))
SaveFile = codecs.open('ArabictwitDB.txt','a', "utf-8")
SaveFile.write(all_data)
SaveFile.write('\n')
SaveFile.close()
return True
except BaseException, e:
print 'failed',str(e)
time.sleep(5)
def on_error(self, status):
print status
if __name__ == '__main__':
l = StdOutListener()
auth = OAuthHandler(ConsumerKey, ConsumerSecret)
auth.set_access_token(AccessToken, AccessTokenSecret)
stream = Stream(auth, l)
stream.filter(track=[unicode("رمضان","utf-8")])
it shows me this error
failed coercing to Unicode: need string or buffer, dict found
but if I replace all_data
SaveFile.write(all_data)
with tweet
SaveFile.write(tweet)
it save only the arabic text How can I save all data into a file ??
If you don't need to format it, simply
SaveFile.write(json.dumps(all_data))
but since you didn't change anything.. why dont just save the data
instead?
or you can access each value like you did with the tweet