Search code examples
python-2.7restloopstwittertweepy

Loop over multiple Twitter search queries REST API


I have a working REST Search API script that pulls tweets according to https://www.karambelkar.info/2015/01/how-to-use-twitters-search-rest-api-most-effectively./

Problem: This code works, but pulls tweets with searchQuery1 and searchQuery2. (e.g. tweets with Prostate Cancer + Colon Cancer). I don't want this. Instead, I would like to get all of tweets from searchQuery1 (only tweets with Prostate Cancer), and then all of the tweets from searchQuery2, (only tweets with Colon Cancer). The queries should run separately.

Goal: Sequentially loop over X number of search queries (e.g. searchQuery1, searchQuery2, etc)

Thank you!

searchQuery1 = 'Prostate Cancer'  
searchQuery2 = 'Colon Cancer' 


maxTweets = 10000
tweetsPerQry = 100  
fprefix = 'REST' 
sinceId = None
max_id = -1L


tweetCount = 0
with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f: #open file
    while tweetCount < maxTweets: 
        try:

            if (max_id <= 0):
                if (not sinceId):
                    for x,y in zip(searchQuery1,searchQuery2):
                        new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry)
                else:
                    print "sinceID 1"
                    new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
                                            since_id=sinceId)

            else:
                if (not sinceId):
                    print "not sinceID 2"
                    new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
                                            max_id=str(max_id - 1))
                else:
                    print "sinceID 1"
                    new_tweets = api.search(q=[searchQuery1, searchQuery2], count=tweetsPerQry,
                                            max_id=str(max_id - 1),
                                            since_id=sinceId)
            if not new_tweets:
                print("No more tweets found")
                break                 

            for tweet in new_tweets: 
                f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
                        '\n')


            tweetCount += len(new_tweets) 
            max_id = new_tweets[-1].id

        except tweepy.TweepError as e:
            print("some error : " + str(e))
            break

print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix))

Solution

  • searchQuery = ['Prostate Cancer', 'Colon Cancer']
    i = 0
    
    
    maxTweets = 1000
    tweetsPerQry = 100  
    fprefix = 'REST' 
    language = ['en']
    
    sinceId = None
    max_id = -1L
    
    tweetCount = 0
    print("Downloading max {0} tweets".format(maxTweets))
    with open('/Users/eer/Desktop/' + fprefix + '.' + time.strftime('%Y-%m-%d_%H-%M-%S') + '.json', 'a+') as f: 
        while tweetCount < maxTweets: 
            try:
                if (max_id <= 0):
                    if (not sinceId):
    
                        for search in searchQuery:
                            new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language)
    
                    else:
                        for search in searchQuery:
                            new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
                                                since_id=sinceId, languages=language)
    
                else:
                        print "not sinceID 2"
                        for search in searchQuery:
                            new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
                                                max_id=str(max_id - 1),languages=language)
                    else:
    
                        for search in searchQuery:
                            new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry,
                                                max_id=str(max_id - 1),
                                                since_id=sinceId, languages=language)
                if not new_tweets:
                    print("No more tweets found; checking next query")
                    i = i + 1
    
                    try:
                        for search in searchQuery:
                            new_tweets = api.search(q=searchQuery[i], count=tweetsPerQry, languages=language)
                    except IndexError:
                        break
    
                for tweet in new_tweets:         
                    f.write(jsonpickle.encode(tweet._json, unpicklable=False) +
                            '\n')
    
                tweetCount += len(new_tweets) 
                print("Downloaded {0} tweets".format(tweetCount))
                max_id = new_tweets[-1].id
    
            except tweepy.TweepError as e:
                print("some error : " + str(e))
                break
    
    print ("Downloaded {0} tweets, Saved to {1}".format(tweetCount, fprefix))