Search code examples
mysqlrdatabasermysqlr-dbi

How to get all mysql databases into r


I want to get a few hundred separate databases into r from mysql. I can get them in separately but I do not know how to make a loop/function/apply to get all of them in at once.

Here is how I get them in separately.

library(RMySQL)
mydb = dbConnect(MySQL(), user='root', password='nelson', host='localhost', dbname="bookstore")                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       
dub4_4_16 <- dbSendQuery(mydb, "select * from dub" )
dub4_4_16 = fetch(dub4_4_16, n=-1)

reg4_4_16 <- dbSendQuery(mydb, "select * from all_sorted")
reg4_4_16 = fetch(reg4_4_16, n=-1)

dub4_5_16 <- dbSendQuery(mydb, "select * from dub4_5")
dub4_5_16 = fetch(dub4_5_16, n=-1)

Solution

  • I use a list of functions like the ones below (based on functions taken from the tcpl package) to work in the RMySQL environment. The functions are just wrappers to DBI calls. This is a very long answer, but the gist of the approach is to create the query strings, the apply over the queries to load the data. The result (results) gives a list of data.table objects containing each table from each database in your connection.

    library(data.table)
    library(RMySQL)
    
    ##----------------------------------------------------------------------------##
    ## Accessory functions
    ##----------------------------------------------------------------------------##
    
    getQuery <- function(query, db, user = "dayne", 
                         pass = "password", host = "localhost") {
      ## Change the default values as necessary, or just pass your values
      ## each time the function is called. Check out the original to see
      ## how it finds global variables set by the package.
    
      dbcon <- dbConnect(drv = RMySQL::MySQL(),
                         user = user,
                         password = pass,
                         host = host,
                         dbname = db)
      result <- dbGetQuery(dbcon, query)
      dbDisconnect(dbcon)
      result <- as.data.table(result)
      result[]
    
    }
    
    sendQuery <- function(query, db = "", user = "dayne", 
                         pass = "password", host = "localhost") {
    
      dbcon <- dbConnect(drv = RMySQL::MySQL(),
                         user = user,
                         password = pass,
                         host = host,
                         dbname = db)
      temp <- try(dbSendQuery(dbcon, query), silent = TRUE)
      if (!is(temp, "try-error")) dbClearResult(temp)
      dbDisconnect(dbcon)
    
      if (!is(temp, "try-error")) return(TRUE)
    
      temp[1]
    
    }
    
    appendTable <- function(dat, tbl, db, user = "dayne", 
                            pass = "password", host = "localhost") {
    
      dbcon <- dbConnect(drv = RMySQL::MySQL(),
                         user = user,
                         password = pass,
                         host = host,
                         dbname = db)
      dbWriteTable(conn = dbcon, 
                   name = tbl, 
                   value = dat, 
                   row.names = FALSE, 
                   append = TRUE)
    
      dbDisconnect(dbcon)
    
      return(TRUE)
    
    }
    
    ##----------------------------------------------------------------------------##
    ## Create example data
    ##----------------------------------------------------------------------------##
    
    listofdb <- c("db1", "db2", "db3")
    q1 <- paste0("CREATE DATABASE ", listofdb, ";")
    sapply(q1, sendQuery)
    listoftables <- paste0("tb", 1:5)
    q2fmt <- "CREATE TABLE %s ( val DOUBLE )"
    q2 <- sprintf(q2fmt, listoftables)
    createtables <- function(db) {
      sapply(q2, sendQuery, db = db)
      sapply(listoftables, 
             appendTable, 
             db = db, 
             dat = data.table(val = rnorm(10)))
    }
    sapply(listofdb, create tables)
    
    
    ##----------------------------------------------------------------------------##
    ## Do the work
    ##----------------------------------------------------------------------------##
    
    ## Load all of the tables form different databases
    my_db_list <- getQuery("SHOW DATABASES;", db = "")$Database
    my_db_list
    # [1] "db1" "db2" "db3"
    
    table_list <- lapply(my_db_list, getQuery, query = "SHOW TABLES;")
    names(table_list) <- my_db_list
    table_list
    # $db1
    #    Tables_in_db1
    # 1:           tb1
    # 2:           tb2
    # 3:           tb3
    # 4:           tb4
    # 5:           tb5
    # 
    # $db2
    #    Tables_in_db2
    # 1:           tb1
    # 2:           tb2
    # 3:           tb3
    # 4:           tb4
    # 5:           tb5
    # 
    # $db3
    #    Tables_in_db3
    # 1:           tb1
    # 2:           tb2
    # 3:           tb3
    # 4:           tb4
    # 5:           tb5
    
    ## The db name to the table_list data.tables and collapse
    table_list <- lapply(names(table_list), 
                         function(x) table_list[[x]][ , db := x])
    table_list <- rbindlist(table_list)
    setnames(table_list, c("tbl", "db"))
    
    ## Load all tables from all databases
    table_list[ , full_name := paste(db, tbl, sep = ".")]
    get_tables <-paste0("SELECT * FROM ", table_list$full_name, ";")
    results <- lapply(get_tables, getQuery, db = "")
    names(results) <- table_list$full_name
    
    results[["db1.tb5"]]
    #            val
    # 1:  -0.09380952
    # 2:   0.81556657
    # 3:   1.18589086
    # 4:   0.19746379
    # 5:   0.91738280
    # 6:   1.30142674
    # 7:   1.42089957
    # 8:  -0.16475130
    # 9:   0.40345353
    # 10: -1.31012033
    
    ##----------------------------------------------------------------------------##
    ## Remove example data
    ##----------------------------------------------------------------------------##
    
    cleanup <- paste0("DROP DATABASE ", listofdb, ";")
    sapply(cleanup, sendQuery)