Search code examples
rjsondataframedplyrtidyverse

Construct JSON column from R dataframe columns


Here is my dataframe:

df <- data.frame(item = c("Box 1", "Tape", "Roll 1"), Length = c(2, 
 10, 6), Width = c(4,3,3), Height  = c(6, NA, NA), Length_units =
 c("in", "ft", "yd"), option_1 = c("item_length", "item_length",
 "item_length"), option_2 = c("item_width", "item_width",
 "item_width"), option_3 = c("item_height", "color", NA), option_4 =
 c("thickness", NA, NA), width_units = c("in", "in", "in"),
 height_units = c("in", "in", NA), color = c(NA, "clear", NA),
 thickness = c( "200#", NA, NA ))
    item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness
1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200# 
2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA>
3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA>

I want this JSON column below attached to the end of df. Notice the option labels in JSON are according the df option columns and the JSON is dynamic according to the option columns.

{"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
{"dimensions":"10 ft x 3 in","color":"clear"}
{"dimensions":"6 yd x 3 in"}

I would greatly appreciate a dplyr solution.

Edited: I want my dataframe to look like this

    item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness options_json
1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200# {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"} 
2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA> {"dimensions":"10 ft x 3 in","color":"clear"}
3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA> {"dimensions":"6 yd x 3 in"}

Solution

  • We may consider to paste/unite the columns and then convert to JSON with toJSON - as the columns have different case, it may be be better to standardize by converting to lower case (tolower - it becomes easier to get the corresponding column value from the _units column). Loop across the columns (length:height), paste (str_c), the corresponding '_units' column values when the value in the column is non-NA (using case_when), then unite those column to a single column, select the columns of interest and convert to JSON (toJSON)

    library(dplyr)
    library(stringr)
    library(jsonlite)
    library(tidyr)
    out <- df %>% 
     rename_with(tolower, everything()) %>%
      mutate(across(length:height, 
       ~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")), 
          sep = " ")))) %>%
      unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>% 
      select(dimensions, color, thickness) %>%
      toJSON(pretty = TRUE)
    

    -output

    out
    [
      {
        "dimensions": "2 in x 4 in x 6 in",
        "thickness": "200#"
      },
      {
        "dimensions": "10 ft x 3 in",
        "color": "clear"
      },
      {
        "dimensions": "6 yd x 3 in"
      }
    ] 
    

    If we want to create a column, use mutate with rowwise

    df <- df %>% 
     rename_with(tolower, everything()) %>%
      mutate(across(length:height, 
       ~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")), 
          sep = " ")))) %>%
      unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>% 
      select(dimensions, color, thickness) %>% 
      rowwise %>%
      transmute(options_json = toJSON(cur_data())) %>%
      ungroup %>%
      bind_cols(df, .)
    

    -output

    df
      item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness
    1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200#
    2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA>
    3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA>
                                                  options_json
    1 [{"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}]
    2          [{"dimensions":"10 ft x 3 in","color":"clear"}]
    3                           [{"dimensions":"6 yd x 3 in"}]
    

    The json format includes the opening/closing square brackets. We can remove it with str_remove

    df <- df %>% 
     rename_with(tolower, everything()) %>%
      mutate(across(length:height, 
       ~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")), 
          sep = " ")))) %>%
      unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>% 
      select(dimensions, color, thickness) %>%
     rowwise %>% 
     transmute(options_json = str_remove_all(toJSON(cur_data()), "\\[|\\]")) %>% 
     ungroup %>%
      bind_cols(df, .)
    

    -output

      item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness
    1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200#
    2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA>
    3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA>
                                                options_json
    1 {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
    2          {"dimensions":"10 ft x 3 in","color":"clear"}
    3                           {"dimensions":"6 yd x 3 in"}
    

    Or may unclass after rowwise

    df <- df %>% 
     rename_with(tolower, everything()) %>%
      mutate(across(length:height, 
       ~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")), 
          sep = " ")))) %>%
      unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>% 
      select(dimensions, color, thickness) %>% 
      rowwise %>%
      transmute(options_json = toJSON(keep(unclass(cur_data()), 
         complete.cases), auto_unbox = TRUE)) %>% 
      ungroup %>% 
      bind_cols(df, .)
    

    -output

    > df
      item Length Width Height Length_units    option_1   option_2    option_3  option_4 width_units height_units color thickness
    1  Box 1      2     4      6           in item_length item_width item_height thickness          in           in  <NA>      200#
    2   Tape     10     3     NA           ft item_length item_width       color      <NA>          in           in clear      <NA>
    3 Roll 1      6     3     NA           yd item_length item_width        <NA>      <NA>          in         <NA>  <NA>      <NA>
                                                options_json
    1 {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
    2          {"dimensions":"10 ft x 3 in","color":"clear"}
    3                           {"dimensions":"6 yd x 3 in"}
    
    > str(df)
    'data.frame':   3 obs. of  14 variables:
     $ item        : chr  "Box 1" "Tape" "Roll 1"
     $ Length      : num  2 10 6
     $ Width       : num  4 3 3
     $ Height      : num  6 NA NA
     $ Length_units: chr  "in" "ft" "yd"
     $ option_1    : chr  "item_length" "item_length" "item_length"
     $ option_2    : chr  "item_width" "item_width" "item_width"
     $ option_3    : chr  "item_height" "color" NA
     $ option_4    : chr  "thickness" NA NA
     $ width_units : chr  "in" "in" "in"
     $ height_units: chr  "in" "in" NA
     $ color       : chr  NA "clear" NA
     $ thickness   : chr  "200#" NA NA
     $ options_json: 'json' chr  "{\"dimensions\":\"2 in x 4 in x 6 in\",\"thickness\":\"200#\"}" "{\"dimensions\":\"10 ft x 3 in\",\"color\":\"clear\"}" "{\"dimensions\":\"6 yd x 3 in\"}"