Search code examples
rsqldf

How to troubleshoot no such column error sqldf in r


I'm getting an error (no such column: cs7.Start) while executing a Where clause using sqldf in r. I have looked at the other historical post and tested few things (single quote, underscore) but of no luck. Any help appreciated.

dat <- structure(list(TIMESTAMP = c("2021-06-18 11:36:00", "2021-06-18 11:38:00", 
"2021-06-18 11:40:00", "2021-06-18 11:42:00", "2021-06-18 11:44:00", 
"2021-06-18 11:46:00"), VWC_CS7 = c(0.046, 0.044, 0.044, 0.044, 
0.044, 0.045), row_id = 1:6), row.names = c(NA, 6L), class = "data.frame")

cs7 <- structure(list(Serial_Number = 1:6, Start = c(101L, 634L, 797L, 
1434L, 2089L, 2490L), End = c(634L, 797L, 1434L, 2089L, 2490L, 
4204L)), row.names = c(NA, 6L), class = "data.frame")

library(sqldf)
new_df <- sqldf("select * from dat Where dat.row_id = cs7.Start and dat.row_id=cs7.End")

Solution

  • What you are trying to do is a inner join. To make it work you need to explicitly join the two tables or to include both tables in the select statement's from clause.

    dat <- structure(list(TIMESTAMP = c("2021-06-18 11:36:00", "2021-06-18 11:38:00", 
                                        "2021-06-18 11:40:00", "2021-06-18 11:42:00", 
                                        "2021-06-18 11:44:00", "2021-06-18 11:46:00"), 
                          VWC_CS7 = c(0.046, 0.044, 0.044, 0.044, 0.044, 0.045), 
                          row_id = 1:6), 
                     row.names = c(NA, 6L), class = "data.frame")
    
    cs7 <- structure(list(Serial_Number = 1:6, 
                          Start = c(101L, 634L, 797L, 1434L, 2089L, 2490L), 
                          End = c(634L, 797L, 1434L, 2089L, 2490L, 4204L)), 
                     row.names = c(NA, 6L), class = "data.frame")
    
    library(sqldf)
    #> Loading required package: gsubfn
    #> Loading required package: proto
    #> Loading required package: RSQLite
    
    sql <- 
    "select dat.TIMESTAMP, dat.VWC_CS7, dat.row_id 
      from dat
      inner join cs7
      on dat.row_id = cs7.Start and
         dat.row_id = cs7.End"
    
    new_df <- sqldf(sql)
    new_df
    #> [1] TIMESTAMP VWC_CS7   row_id   
    #> <0 rows> (or 0-length row.names)
    
    sql2 <- 
      "select dat.TIMESTAMP, dat.VWC_CS7, dat.row_id 
      from dat, cs7
      where dat.row_id = cs7.Start and
            dat.row_id = cs7.End"
    
    new_df2 <- sqldf(sql2)
    new_df2
    #> [1] TIMESTAMP VWC_CS7   row_id   
    #> <0 rows> (or 0-length row.names)
    

    Created on 2022-07-21 by the reprex package (v2.0.1)