so as the title says I hardly tried to figure out how to save tweets using tweepy in python 3.6. I found a solution that I can save it in English but I can't in Arabic. anyone have any ideas how?
the output I get in the CSV file for Arabic tweets is like this
1510123361.875904::\u0623\u0639\u0648\u0630 \u0628\u0643\u0644\u0645\u0627\u062a \u0627\u0644\u0644\u0647 \u0627\u0644/FMsjMi2nvF
Thank you in advance.
This is my code
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
save = open('ExampleNumber4.csv', mode='w', encoding="utf8", newline=None)
class listener(StreamListener) :
def on_data (self , data):
try:
tweet = json.loads(data)['text']
print(tweet.translate(non_bmp_map))
tweet = data.split(',"text":"')[1].split('","source')[0]
savefile = str(time.time()) + "::" + tweet
save.write(savefile)
save.write("\n\n")
return (True)
except KeyError:
pass
def on_error(self , status):
print(status)
auth = OAuthHandler (ConsumerKey , ConsumerSecret)
auth.set_access_token(AccessToken , AccessTokenSecret)
twitterStream = Stream(auth , listener())
twitterStream.filter(track=[u'سيارة'])
save.close()
Here's a working solution. Please try to make working examples that produce the error of your question next time by including some sample JSON data and skipping the twitter code that we can't run as is.
#coding:utf8
import sys
import json
import time
import csv
data = r'{"text": "\u0633\u064a\u0627\u0631\u0629\ud83d\ude00"}' # ASCII JSON
# data = '{"text": "سيارة😀"}' # non-ASCII JSON
non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
with open('ExampleNumber4.csv', mode='w', encoding="utf-8-sig", newline='') as save:
writer = csv.writer(save)
tweet = json.loads(data)['text']
print(tweet.translate(non_bmp_map))
savefile = [time.time(),tweet]
writer.writerow(savefile)
Output:
1510208283.7488384,سيارة�