Search code examples
rtidyversepurrr

Why is the purrr::map function not correctly mapping a function to each piece of a split dataframe?


I have the following dataframe that we can call df_all

structure(list(ID = c("1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385"
), Book = c("Bovada", "Bovada", "LowVig.ag", "LowVig.ag"), Home = c("Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide"
), Away = c("San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs"), Team = c("Alabama Crimson Tide", "San Diego St Aztecs", 
"Alabama Crimson Tide", "San Diego St Aztecs"), Price = c(-110, 
-110, -111, -101), Points = c(-7.5, 7.5, -7, 7)), row.names = c(NA, 
-4L), class = c("tbl_df", "tbl", "data.frame"))

and I have the following dataframe that we can call df_alt

structure(list(ID = c("1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385", 
"1738c0c7214e7fced61c1caa479a5385", "1738c0c7214e7fced61c1caa479a5385"
), Book = c("Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", 
"Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", 
"Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", "Pinnacle", 
"Pinnacle"), Home = c("Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide"), Away = c("San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs"), Team = c("Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"Alabama Crimson Tide", "Alabama Crimson Tide", "Alabama Crimson Tide", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "San Diego St Aztecs", 
"San Diego St Aztecs", "San Diego St Aztecs", "Alabama Crimson Tide", 
"San Diego St Aztecs"), Price = c(-149, -138, -126, -115, 105, 
114, 122, 132, 128, 119, 110, 102, -119, -131, -142, -154, -104, 
-108), Points = c(-5.5, -6, -6.5, -7, -8, -8.5, -9, -9.5, 5.5, 
6, 6.5, 7, 8, 8.5, 9, 9.5, -7.5, 7.5)), row.names = c(NA, -18L
), class = c("tbl_df", "tbl", "data.frame"))

I have the following function which looks for common/intersecting Points values between df_all and df_alt.

int_value <- function(df){
    
    df %>% 
            dplyr::select(c(ID, Team, Points)) %>%  
            dplyr::intersect(df_alt %>% dplyr::select(c(ID, Team,Points))) %>% 
            mutate(Book = 'Pinnacle')
    
    df %>% full_join(df_int)%>% left_join(df_alt %>% rename(price=Price)) %>% 
            mutate(Price=ifelse(is.na(price),Price,price))%>% 
            select(-price)
}

I am trying to apply int_value using the following map syntax.

df_all %>% 
    group_split(ID, Book) %>% 
    map(int_value)

This is the output that is returned which is not the desired output.

[[1]]
# A tibble: 8 × 7
ID                               Book      Home                 Away                Team                 
Price Points
<chr>                            <chr>     <chr>                <chr>               <chr>                
<dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada    Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -110   -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada    Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -110    7.5
3 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -111   -7  
4 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -101    7  
5 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -104   -7.5
6 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -108    7.5
7 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -115   -7  
8 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs    102    7  

[[2]]
# A tibble: 8 × 7
ID                               Book      Home                 Away                Team                 
Price Points
<chr>                            <chr>     <chr>                <chr>               <chr>                
<dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada    Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -110   -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada    Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -110    7.5
3 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -111   -7  
4 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -101    7  
5 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -104   -7.5
6 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -108    7.5
7 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -115   -7  
8 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs    102    7  

This is the desired output and what I expected to be returned.

[[1]]
# A tibble: 6 × 7
ID                               Book     Home                 Away                Team                 
Price Points
<chr>                            <chr>    <chr>                <chr>               <chr>                
<dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 Bovada   Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -110   -7.5
2 1738c0c7214e7fced61c1caa479a5385 Bovada   Alabama Crimson Tide San Diego St Aztecs San Diego 
St Aztecs   -110    7.5
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -104   -7.5
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St Aztecs San Diego 
St Aztecs   -108    7.5


[[2]]
# A tibble: 6 × 7
ID                               Book      Home                 Away                Team                 
Price Points
<chr>                            <chr>     <chr>                <chr>               <chr>                
<dbl>  <dbl>
1 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -111   -7  
2 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs   -101    7  
3 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs Alabama 
Crimson Tide  -115   -7  
4 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St Aztecs San 
Diego St Aztecs    102    7 

The map function doesn't appear to be honoring the implied group_by based on the Book column. What am I missing?


Solution

  • The solution was as @stefan had recommended. After defining df_int and assigning the necessary variables the output is accurate. Here is the updated function

    int_value <- function(df){
        
        df_int <- df %>% 
                dplyr::select(c(ID, Home, Away, Team, Points)) %>%  
                dplyr::intersect(df_alt %>% dplyr::select(c(ID, Home, Away, Team, 
    Points))) %>% 
                mutate(Book = 'Pinnacle')
        
        df_join <- df %>% full_join(df_int)
        
        df_final <- df_join %>% left_join(df_alt %>% rename(price=Price)) %>% 
                mutate(Price=ifelse(is.na(price),Price,price))%>% 
                select(-price)
        
    }
    

    And here is the updated output

    [[1]]
    # A tibble: 4 × 7
    ID                               Book     Home                 Away                
    Team                 Price Points
    <chr>                            <chr>    <chr>                <chr>               
    <chr>                <dbl>  <dbl>
    1 1738c0c7214e7fced61c1caa479a5385 Bovada   Alabama Crimson Tide San Diego St 
    Aztecs Alabama Crimson Tide  -110   -7.5
    2 1738c0c7214e7fced61c1caa479a5385 Bovada   Alabama Crimson Tide San Diego St 
    Aztecs San Diego St Aztecs   -110    7.5
    3 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St 
    Aztecs Alabama Crimson Tide  -104   -7.5
    4 1738c0c7214e7fced61c1caa479a5385 Pinnacle Alabama Crimson Tide San Diego St 
    Aztecs San Diego St Aztecs   -108    7.5
    
    [[2]]
    # A tibble: 4 × 7
    ID                               Book      Home                 Away                
    Team                 Price Points
    <chr>                            <chr>     <chr>                <chr>               
    <chr>                <dbl>  <dbl>
    1 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St 
    Aztecs Alabama Crimson Tide  -111     -7
    2 1738c0c7214e7fced61c1caa479a5385 LowVig.ag Alabama Crimson Tide San Diego St 
    Aztecs San Diego St Aztecs   -101      7
    3 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St 
    Aztecs Alabama Crimson Tide  -115     -7
    4 1738c0c7214e7fced61c1caa479a5385 Pinnacle  Alabama Crimson Tide San Diego St 
    Aztecs San Diego St Aztecs    102      7