Search code examples
pythonstreamtwitter-oauthtweepy

How do I find the number of times a word has been repeated in a tweepy stream?


I've recently been working with the tweepy library in python. I wrote a program that streams through tweets. I've used strings as filters. I've assigned a certain variable to each string in a list depending on the mood the string represents.

    class listener(StreamListener):

    def on_status(self, status):
        try:
            print status
            return True
        except BaseException, e:
            print 'failed on status, ', str(e)
            time.sleep(5)
            if '' in status.text.lower() and 'retweeted_status' not in status:
                print status.text
                print status.coordinates

    def on_error(self, status):
        print status

auth = OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
twitterStream = Stream(auth, listener())

api = tweepy.API(auth)


happy=["I love","I'm so happy","feeling joyful","feeling awesome"]
sad=["I'm depressed","I'm very sad","It is painful","feeling terible"]
angry=["I'm furious","wtf!","I'm so pissed off","I'm angry"]
shocked=["rip","omg","I can't believe it","I'm shocked"]
romantic=["I love you","today is my anniversary","feeling romantic","I'm dating"]

twitterStream.filter(track=sad)(track=happy)(track=angry)(track=shocked)(track=romantic)

mood_happy=0
mood_sad=0
mood_angry=0
mood_shocked=0
mood_romantic=0

What I wanted to do was each time the string from any variable appears in the line of tweets I wanted the value of mood_n to add up to 1. So for example if the phrase 'Feeling Joyful' appears 5 times in the stream. I would want the value of mood_happy=5. How can I go about this?

I'm sorry, I'm aware that i'm not supposed to post such queries but I've been searching for the solution on google from many hours but haven't found a single bit of information on this. :(


Solution

  • There is a place for optimizations, but basically it looks like:

    import time
    from tweepy import StreamListener, OAuthHandler, Stream
    
    
    twitter_access = {
        "consumer_key"       : "enter_consumer_key",
        "consumer_secret"    : "enter_consumer_secret",
        "access_token"       : "enter_access_token",
        "access_token_secret": "enter_access_token_secret",
    }
    
    happy = ["I love", "I'm so happy", "feeling joyful", "feeling awesome"]
    sad = ["I'm depressed", "I'm very sad", "It is painful", "feeling terible"]
    angry = ["I'm furious", "wtf!", "I'm so pissed off", "I'm angry"]
    shocked = ["rip", "omg", "I can't believe it", "I'm shocked"]
    romantic = ["I love you", "today is my anniversary", "feeling romantic", "I'm dating"]
    
    mood_happy = 0
    mood_sad = 0
    mood_angry = 0
    mood_shocked = 0
    mood_romantic = 0
    
    
    class Listener(StreamListener):
    
        def on_status(self, status):
            global mood_happy, mood_sad, mood_angry, mood_shocked, mood_romantic
    
            try:
                # print status
                tweet_text = status.text
                for mood_n_score in [[happy, 'mood_happy'], [sad, 'mood_sad'], [angry, 'mood_angry'],
                                     [shocked, 'mood_shocked'], [romantic, 'mood_romantic']]:
                    lst_mood = mood_n_score[0]
                    type_mood = mood_n_score[1]
    
                    for mood in lst_mood:
                        if mood in tweet_text:
                            if type_mood == 'mood_happy':
                                mood_happy += 1
                            elif type_mood == 'mood_sad':
                                mood_sad += 1
                            elif type_mood == 'mood_angry':
                                mood_angry += 1
                            elif type_mood == 'mood_shocked':
                                mood_shocked += 1
                            else:
                                mood_romantic += 1
                            break
    
                print('\n----------------')
                print 'mood_happy:', mood_happy
                print 'mood_sad:', mood_sad
                print 'mood_angry:', mood_angry
                print 'mood_shocked:', mood_shocked
                print 'mood_romantic:', mood_romantic
    
                # return True
    
            except BaseException, e:
                print 'failed on status, ', str(e)
                time.sleep(5)
                if '' in status.text.lower() and 'retweeted_status' not in status:
                    print status.text
                    print status.coordinates
    
        def on_error(self, status):
            print status
    
    auth = OAuthHandler(twitter_access["consumer_key"], twitter_access["consumer_secret"])
    auth.set_access_token(twitter_access["access_token"], twitter_access["access_token_secret"])
    # api = API(auth)
    twitterStream = Stream(auth, Listener(), timeout=None)
    
    for list_of_mood in [happy, sad, angry, shocked, romantic]:
        twitterStream.filter(track=list_of_mood)