Search code examples
rdplyrtidyverseleft-joindbplyr

How to retain the matching column in dbplyr:::left_join.tbl_lazy from y?


In pure SQL I could do something like this:

SELECT 
    A.id,
    CASE WHEN B.id IS NOT NULL THEN NULL ELSE A.load END AS load
FROM 
    A
LEFT JOIN 
    B ON A.id = B.id;

How would I translate that to the tidyverse syntax?

library(dplyr)
library(dbplyr)
con <- simulate_mssql()
A <- tbl_lazy(data.frame(id = 1:10, load = "x"), con, name = "A")
B <- tbl_lazy(data.frame(id = 1:5), con, name = "B")

left_join(A, B, "id") ## shows only A.id

# SELECT `A`.*
# FROM `A`
# LEFT JOIN `B`
#   ON (`A`.`id` = `B`.`id`)

Is the only way to add a helper column to B like this:

left_join(A, B %>% mutate(flag = "x"), "id") %>% 
  mutate(load = if_else(flag == "x", NA_character_ load))

# <SQL>
# SELECT `id`, IIF(`flag` = 'x', NULL, `load`) AS `load`, `flag`
# FROM (
#   SELECT `A`.*, `flag`
#   FROM `A`
#   LEFT JOIN (
#     SELECT `B`.*, 'x' AS `flag`
#     FROM `B`
#   ) AS `RHS`
#     ON (`A`.`id` = `RHS`.`id`)
# ) AS `q01`

Solution

  • You can keep your B.id using keep = TRUE in the join, i.e.

    library(dplyr)
    A2 <- data.frame(id = 1:10, load = "x")
    B2 <- data.frame(id = 1:5)
    left_join(A2, B2, "id", keep = TRUE) %>%
      mutate(load = if_else(!is.na(id.y), NA_character_, load))
    #>    id.x load id.y
    #> 1     1 <NA>    1
    #> 2     2 <NA>    2
    #> 3     3 <NA>    3
    #> 4     4 <NA>    4
    #> 5     5 <NA>    5
    #> 6     6    x   NA
    #> 7     7    x   NA
    #> 8     8    x   NA
    #> 9     9    x   NA
    #> 10   10    x   NA
    
    library(dbplyr)
    con <- simulate_mssql()
    A <- tbl_lazy(data.frame(id = 1:10, load = "x"), con, name = "A")
    B <- tbl_lazy(data.frame(id = 1:5), con, name = "B")
    
    left_join(A, B, "id", keep = TRUE) %>%
      mutate(load = if_else(!is.na(id.y), NA_character_, load))
    #> <SQL>
    #> SELECT `id.x`, IIF(NOT((`id.y` IS NULL)), NULL, `load`) AS `load`, `id.y`
    #> FROM (
    #>   SELECT `A`.`id` AS `id.x`, `load`, `B`.`id` AS `id.y`
    #>   FROM `A`
    #>   LEFT JOIN `B`
    #>     ON (`A`.`id` = `B`.`id`)
    #> ) AS `q01`
    

    Created on 2024-02-22 with reprex v2.1.0

    Not sure if that helps though...