Search code examples
rdplyrrsqliter-dbidbplyr

Create variable in database using ALTER TABLE and UPDATE


I have a 50GB SQLite database file and I want to calculate and add new variables. Can you leverage Moody_Mudskipper's function or something using ALTER TABLE and UPDATE to create the variable instead of an entire table?

library(dbplyr)
    library(DBI)
    con <- DBI::dbConnect(RSQLite::SQLite(), path = ":memory:")
    copy_to(con, head(iris,3),"iris")

create <- function(data,name){
   DBI::dbSendQuery(data$src$con,
                    paste("CREATE TABLE", name,"AS", dbplyr::sql_render(data)))
                             }

tbl(con,"iris") %>% 
   mutate(Sepal.Area= Sepal.Length * Sepal.Width) %>% 
   create("iris_2")

Solution

  • Here is an easy solution using pool and DBI since you can directly write and execute any valid SQL statement.

    library(DBI)
    library(pool)
    library(RSQLite)
    
    #Database
    #Create a connection
    pool <- dbPool(drv =RSQLite::SQLite(),dbname="")
    #Colse open connection when it is no longer needed, to prevent leaks
    poolClose(pool)
    
    
    dbWriteTable(conn = pool, name = "mtcars", value = mtcars)
    
    insert_new_column <- function(table_name, column_name, column_type){
            query <- sqlInterpolate(pool,"ALTER TABLE ?table ADD COLUMN ?column ?type;",table=table_name, column=column_name, type=column_type)
    
            dbExecute(pool,query)
    
    }
    
    insert_new_column(table_name="mtcars", column_name="test", column_type="REAL")  #More types: INTEGER, TEXT
    
    dbGetQuery(pool,"Select * from mtcars;")
    
    head(dbReadTable(pool, "mtcars"))
       mpg cyl disp  hp drat    wt  qsec vs am gear carb test
    1 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4  NA
    2 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4  NA
    3 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1  NA
    4 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1  NA
    5 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2  NA
    6 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1  NA
    

    Then you can do:

    tbl(pool,"mtcars") %>% 
        mutate(test= cyl * 2)
    

    Update

    con <- dbConnect(RSQLite::SQLite(), ":memory:")
    
    dbWriteTable(con, "mtcars", mtcars)
    insert_new_column2 <- function(table_name, column_name, column_type){
          dbWithTransaction(
                con,{
                       dbExecute(con, sqlInterpolate(con,"ALTER TABLE ?table ADD COLUMN ?column ?type;",table=table_name, column=column_name, type=column_type))
    
                       dbExecute(con, sqlInterpolate(con,"UPDATE ?table SET ?column = cyl*2;",table=table_name, column=column_name))
    
      }) 
     }