Search code examples
pythonpython-3.xtwitterarabictweepy

I can't save an Arabic tweet using tweepy in python 3.6


so as the title says I hardly tried to figure out how to save tweets using tweepy in python 3.6. I found a solution that I can save it in English but I can't in Arabic. anyone have any ideas how?

the output I get in the CSV file for Arabic tweets is like this

1510123361.875904::\u0623\u0639\u0648\u0630 \u0628\u0643\u0644\u0645\u0627\u062a \u0627\u0644\u0644\u0647 \u0627\u0644/FMsjMi2nvF

Thank you in advance.

This is my code

non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)

save = open('ExampleNumber4.csv', mode='w', encoding="utf8", newline=None)


class listener(StreamListener) :
    def on_data (self , data):
        try:
            tweet = json.loads(data)['text']
            print(tweet.translate(non_bmp_map))
            tweet = data.split(',"text":"')[1].split('","source')[0]
            savefile = str(time.time()) + "::" + tweet
            save.write(savefile)
            save.write("\n\n")
            return (True)

        except KeyError:
            pass


    def on_error(self , status):
        print(status)


auth = OAuthHandler (ConsumerKey , ConsumerSecret)
auth.set_access_token(AccessToken , AccessTokenSecret)
twitterStream = Stream(auth , listener())
twitterStream.filter(track=[u'سيارة'])
save.close()

Solution

  • Here's a working solution. Please try to make working examples that produce the error of your question next time by including some sample JSON data and skipping the twitter code that we can't run as is.

    #coding:utf8
    import sys
    import json
    import time
    import csv
    
    data = r'{"text": "\u0633\u064a\u0627\u0631\u0629\ud83d\ude00"}' # ASCII JSON
    # data = '{"text": "سيارة😀"}'  # non-ASCII JSON
    
    non_bmp_map = dict.fromkeys(range(0x10000, sys.maxunicode + 1), 0xfffd)
    
    with open('ExampleNumber4.csv', mode='w', encoding="utf-8-sig", newline='') as save:
        writer = csv.writer(save)
        tweet = json.loads(data)['text']
        print(tweet.translate(non_bmp_map))
        savefile = [time.time(),tweet]
        writer.writerow(savefile)
    

    Output:

    1510208283.7488384,سيارة�