Search code examples
sql-serverrfor-looprodbc

R Skip to Next Record if Value Already Exists in Database


I'm banging my head against a wall here trying to figure this out... I have a script that if the game_id already exists in the table I'm writing to I would like to skip to the next record in the loop

  #Connect to db
saber <- odbcConnect("sabermetrics")

  #query to pull all existing game records
check_query <- paste0("select distinct game_id from allbats")

  #build variables to compare db field and game_id in current url
game_id_check <- sqlQuery(saber, check_query)
curr_gameid <- substr(thisboxscoreURL, 66, 95)

  #if they match sktip to next record
if(game_id_check == curr_gameid) next

I've tried matrixing the data, listing, unlisting, combinations to get the data to "match", and failed miserably EVERYWHERE!

I could probably put the game_id from the url in the where clause of the query string and if the count is greater than 0 then skip to the next record as well. I'm not sure which one would be better, probably the latter because I would only return one value at most for each record in the loop.

something like

  #Connect to db
saber <- odbcConnect("sabermetrics")

  #query to pull all existing game records
check_query <- paste0("select distinct game_id from allbats where game_id = url_game_id")

if(count(sqlquery(saber, check_query)))>0 next

example data

away    home    inning  away_team_code  home_team_code  game_id

0         0       1        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1
0         0       2        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1
0         0       3        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1
0         0       4        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1
0         1       5        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1
0         0       6        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1
0         0       7        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1
0         0       8        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1
0         x       9        sdn            sfn           gid_2016_04_26_sdnmlb_sfnmlb_1

Here is the dput, not sure it's useful though.

dput(sqlQuery(saber, "SELECT TOP 20 * FROM allbats"))
structure(list(away = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 5L, 0L, 1L, 0L, 0L, 0L), home = structure(c(1L, 
1L, 1L, 1L, 2L, 1L, 1L, 1L, 4L, 3L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("0", "1", "2", "x"), class = "factor"), 
inning = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L, 3L, 
4L, 5L, 6L, 7L, 8L, 9L, 1L, 2L), away_team_code = structure(c(3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L), .Label = c("hou", "mia", "sdn"), class = "factor"), 
home_team_code = structure(c(3L, 3L, 3L, 3L, 3L, 3L, 3L, 
3L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L), .Label = c("lan", 
"sea", "sfn"), class = "factor"), game_id = structure(c(3L, 
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 2L, 1L, 1L), .Label = c("gid_2016_04_26_houmlb_seamlb_1", 
"gid_2016_04_26_miamlb_lanmlb_1", "gid_2016_04_26_sdnmlb_sfnmlb_1"
), class = "factor")), .Names = c("away", "home", "inning", 
"away_team_code", "home_team_code", "game_id"), row.names = c(NA, 
20L), class = "data.frame")

Any help would be aprreciated!! Thanks!


Solution

  • I figured out a way to do it using the sqldf package

    saber <- odbcConnect("sabermetrics")
    
    existingGames <- paste0("select distinct game_id from linescore")
    newGames <- data.frame(gameids)
    
    existingGames <- sqlQuery(saber, existingGames)
    
    require(sqldf)
    
      # Get all new games that do not exist in existing games (linescore table)
    ngNotIneg <- sqldf('SELECT distinct * FROM newGames EXCEPT SELECT distinct * FROM existingGames')
    
    odbcClose(saber)