Search code examples
functiondplyrstringdist

stringdist within a function


I am working in R and using the following data:

  data <- data.frame(
  id = c(1, 2, 1, 2, 1, 2),
  city = c("London", "London", "Lisbon", "Lisbon", "Paris", "Paris"),
  name = c("jane", "janey",  "jane", "janee", "lauran", "laura"))
  
id city name
1 London jane
2 London janey
1 Lisbon jane
2 Lisbon janee
1 Paris lauran
2 Paris laura

My function pivots the data (section 1), but I also want it to go on to stringdist the pivoted table (section 2)

pivot_and_string_dist <- function(input_data,
                                  measure_1,
                                  string_dist_method){

# section 1 - works fine
# see (1) below for output after this section
data_pivoted <- input_data %>%
                pivot_wider(names_from = id,
                            names_glue =  "{.value}_{id}",
                            values_from = c({{measure_1}}))

# section 2 - does not work
# see (2) below for required output after this section 
data_string_dist <- data_pivoted %>%
                    mutate(string_dist_1 = 
                                 stringdist((paste0(!!{{measure_1}}, "_1")),
                                            (paste0(!!{{measure_1}}, "_2")),
                                            method = string_dist_method,
                                            p = 0.1))
return(data_string_dist)
}

(1) output after section 1 of function - works fine

data <- data.frame(
  city = c(London, Lisbon, Paris),
  name_1 = c("jane", "jane", "lauran")
  name_2 = c("janey", "janee", "laura"))
city name_1 name_2
London jane janey
Lisbon jane janee
Paris lauran laura

(2) required output after section 2 of function - not working

  x <- pivot_and_string_dist(input_data = data,
                             measure_1 = name,
                             string_dist_method = "jw")
  
Error in quos(..., .ignore_empty = "all") : object 'name' not found

(NOTE: I am showing dummy stringdist numbers here!)

x:

city name_1 name_2 stringdist
London jane janey 0.4
Lisbon jane janee 0.5
Paris lauran laura 0.6

Thanks


Solution

  • Using enquo() and as_label() from rlang package is one option:

    pivot_and_string_dist <- function(input_data,
                                      measure_1,
                                      string_dist_method){
      
      m1 <- rlang::as_label(rlang::enquo(measure_1))
      
      # section 1 - works fine
      # see (1) below for output after this section
      data_pivoted <- input_data %>%
        pivot_wider(names_from = id,
                    names_glue =  "{.value}_{id}",
                    values_from = c({{measure_1}}))
      
      data_string_dist <- data_pivoted %>%
        mutate(string_dist_1 =
                 stringdist(.data[[(paste0(m1, "_1"))]],
                            .data[[(paste0(m1, "_2"))]],
                            method = string_dist_method,
                            p = 0.1))
      
      return(data_string_dist)
    }
    

    Usage and Output

    pivot_and_string_dist(input_data = data,
                          measure_1 = name,
                          string_dist_method = "jw")
    
    # A tibble: 3 × 4
      city   name_1 name_2 string_dist_1
      <chr>  <chr>  <chr>          <dbl>
    1 London jane   janey         0.0400
    2 Lisbon jane   janee         0.0400
    3 Paris  lauran laura         0.0333