Search code examples
rtwitterpaginationsocial-networkingrtweet

How to get all the Twitter followers ids (> 75000) of a user using pagination in R with rtwitter library?


I need a function to get all the followers (or friends) ids of a user in a data.frame with more than 75000 followers (or friends, above Twitter API limit) using the rtwitter library.

I first tried the next_cursor documentation help example:

# Retrieve user ids of accounts following POTUS
f1 <- get_followers("potus", n = 75000)
page <- next_cursor(f1)

# max. number of ids returned by one token is 75,000 every 15
# minutes, so you'll need to wait a bit before collecting the
# next batch of ids
sys.Sleep(15*60) # Suspend execution of R expressions for 15 mins

# Use the page value returned from \code{next_cursor} to continue
# where you left off.
f2 <- get_followers("potus", n = 75000, page = page)

But, how can I get all the followers (or friends) ids in the same data.frame taking into account different followers or friends counts for different users? How can I manage pagination?


Solution

  • After some test, I wrote this recursive function that include a progress bar and get all the followers ids from a user:

    # Load library
    library('rtweet')
    
    # Global variables:
    ids <- 75000 # Max number of ids per token every 15 minutes
    f <- list() # Vector where user_ids from followers will be appended
    
    # Function to get all the followers from a user with pagination
    GetFollowersRecursivePagination <- function(userId, followers, page) {
    
      if (ids == 0) {
    
        # API Twitter Limit reached - Wait
        message("Waiting 15 mins...")
        total <- 15*60 # Total time = 15 min ~ 900 sec
        pb <- txtProgressBar(min = 0, max = total, style = 3) # create progress bar
    
        for (i in 1:total) {
          Sys.sleep(time = 1) # 1 second interval
          setTxtProgressBar(pb, i) # update progress bar
        }
        close(pb)
    
        # Check rate limit followers/ids query
        if (!rate_limit(token = NULL)[38,]$reset > 14.9) {
          message("Waiting 15 seconds more...")
          Sys.sleep(time = 15) # wait 15 seconds more...
        }
    
        message("Go!")
        ids <<- 75000
      }
    
      if (followers <= ids) {
    
        message(paste("Followers < ids | Number of Followers: ",
                      followers, " | Number of resting ids: ",  ids, sep = ""))
        ftemp <- get_followers(user = userId, n = followers, page = page)
    
        if (page == '-1') {
          f <<- append(f, list(ftemp)) # append followers ids
        }
    
        if (page != '-1') {
          df <- data.frame('user_id' = ftemp)
          f <<- append(f, list(df)) # append followers ids
        }
    
        ids <<- ids - followers
        message("Finished!")
        rtemp <- f
        f <<- list()
        return(rtemp)
    
      } else if (followers > ids) {
    
        message(paste("Followers > ids | Number of Followers: ",
                      followers, " | Number of resting ids: ",  ids, sep = ""))
        ftemp <- get_followers(user = userId, n = ids, page = page)
    
        if (page == '-1') {
          f <<- append(f, list(ftemp)) # append followers ids
        }
    
        if (page != '-1') {
          df <- data.frame('user_id' = ftemp)
          f <<- append(f, list(df)) # append followers ids
        }
    
        n <- ids # n = count of followers ids already acquired
    
        pageTemp <- next_cursor(ftemp) # Pagination
    
        # API Twitter Limit reached - Wait
        message("Waiting 15 mins...")
        total <- 15*60 # Total time = 15 min ~ 900 sec
        pb <- txtProgressBar(min = 0, max = total, style = 3) # create progress bar
    
        for (i in 1:total) {
          Sys.sleep(time = 1) # 1 second interval
          setTxtProgressBar(pb, i) # update progress bar
        }
        close(pb)
    
        # Check rate limit followers/ids query
        if (!rate_limit(token = NULL)[38,]$reset > 14.9) {
          message("Waiting 15 seconds more...")
          Sys.sleep(time = 15) # wait 15 seconds more...
        }
    
        message("Go!")
        ids <<- 75000
    
        # Recursive function call
        GetFollowersRecursivePagination(userId = userId,
                                        followers = followers - n,
                                        page = pageTemp)
      }
    }
    
    # Test ( > 75000 followers )
    # Get user
    user1 <- lookup_users(users = "146620155")
    FAOClimate <- GetFollowersRecursivePagination(userId = user1$user_id,
                                                  followers = user1$followers_count,
                                                  page = '-1')
    
    # Output:
    Followers > ids | Number of Followers: 87208 | Number of resting ids:  75000
    Followers < ids | Number of Followers: 12208 | Number of resting ids: 62792
    Finished!
    
    str(FAOClimate)
    
    List of 2
     $ :'data.frame':   75000 obs. of  1 variable:
      ..$ user_id: chr [1:75000] "87189802" "884863013928546304" "886995444512964608" "852940633652301824" ...
      ..- attr(*, "next_cursor")= num 1.45e+18
     $ :'data.frame':   12207 obs. of  1 variable:
      ..$ user_id: chr [1:12207] "2175314977" "2168898233" "1491745484" "2175065456" ...
    

    Note: change the function get_followers to get_friends if you want friends instead of followers.