Here is my dataframe:
df <- data.frame(item = c("Box 1", "Tape", "Roll 1"), Length = c(2,
10, 6), Width = c(4,3,3), Height = c(6, NA, NA), Length_units =
c("in", "ft", "yd"), option_1 = c("item_length", "item_length",
"item_length"), option_2 = c("item_width", "item_width",
"item_width"), option_3 = c("item_height", "color", NA), option_4 =
c("thickness", NA, NA), width_units = c("in", "in", "in"),
height_units = c("in", "in", NA), color = c(NA, "clear", NA),
thickness = c( "200#", NA, NA ))
item Length Width Height Length_units option_1 option_2 option_3 option_4 width_units height_units color thickness
1 Box 1 2 4 6 in item_length item_width item_height thickness in in <NA> 200#
2 Tape 10 3 NA ft item_length item_width color <NA> in in clear <NA>
3 Roll 1 6 3 NA yd item_length item_width <NA> <NA> in <NA> <NA> <NA>
I want this JSON column below attached to the end of df. Notice the option labels in JSON are according the df option columns and the JSON is dynamic according to the option columns.
{"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
{"dimensions":"10 ft x 3 in","color":"clear"}
{"dimensions":"6 yd x 3 in"}
I would greatly appreciate a dplyr solution.
Edited: I want my dataframe to look like this
item Length Width Height Length_units option_1 option_2 option_3 option_4 width_units height_units color thickness options_json
1 Box 1 2 4 6 in item_length item_width item_height thickness in in <NA> 200# {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
2 Tape 10 3 NA ft item_length item_width color <NA> in in clear <NA> {"dimensions":"10 ft x 3 in","color":"clear"}
3 Roll 1 6 3 NA yd item_length item_width <NA> <NA> in <NA> <NA> <NA> {"dimensions":"6 yd x 3 in"}
We may consider to paste/unite
the columns and then convert to JSON with toJSON
- as the columns have different case, it may be be better to standardize by converting to lower case (tolower
- it becomes easier to get
the corresponding column value from the _units
column). Loop across
the columns (length:height
), paste
(str_c
), the corresponding '_units' column values when the value in the column is non-NA (using case_when
), then unite
those column to a single column, select
the columns of interest and convert to JSON (toJSON
)
library(dplyr)
library(stringr)
library(jsonlite)
library(tidyr)
out <- df %>%
rename_with(tolower, everything()) %>%
mutate(across(length:height,
~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")),
sep = " ")))) %>%
unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>%
select(dimensions, color, thickness) %>%
toJSON(pretty = TRUE)
-output
out
[
{
"dimensions": "2 in x 4 in x 6 in",
"thickness": "200#"
},
{
"dimensions": "10 ft x 3 in",
"color": "clear"
},
{
"dimensions": "6 yd x 3 in"
}
]
If we want to create a column, use mutate
with rowwise
df <- df %>%
rename_with(tolower, everything()) %>%
mutate(across(length:height,
~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")),
sep = " ")))) %>%
unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>%
select(dimensions, color, thickness) %>%
rowwise %>%
transmute(options_json = toJSON(cur_data())) %>%
ungroup %>%
bind_cols(df, .)
-output
df
item Length Width Height Length_units option_1 option_2 option_3 option_4 width_units height_units color thickness
1 Box 1 2 4 6 in item_length item_width item_height thickness in in <NA> 200#
2 Tape 10 3 NA ft item_length item_width color <NA> in in clear <NA>
3 Roll 1 6 3 NA yd item_length item_width <NA> <NA> in <NA> <NA> <NA>
options_json
1 [{"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}]
2 [{"dimensions":"10 ft x 3 in","color":"clear"}]
3 [{"dimensions":"6 yd x 3 in"}]
The json
format includes the opening/closing square brackets. We can remove it with str_remove
df <- df %>%
rename_with(tolower, everything()) %>%
mutate(across(length:height,
~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")),
sep = " ")))) %>%
unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>%
select(dimensions, color, thickness) %>%
rowwise %>%
transmute(options_json = str_remove_all(toJSON(cur_data()), "\\[|\\]")) %>%
ungroup %>%
bind_cols(df, .)
-output
item Length Width Height Length_units option_1 option_2 option_3 option_4 width_units height_units color thickness
1 Box 1 2 4 6 in item_length item_width item_height thickness in in <NA> 200#
2 Tape 10 3 NA ft item_length item_width color <NA> in in clear <NA>
3 Roll 1 6 3 NA yd item_length item_width <NA> <NA> in <NA> <NA> <NA>
options_json
1 {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
2 {"dimensions":"10 ft x 3 in","color":"clear"}
3 {"dimensions":"6 yd x 3 in"}
Or may unclass
after rowwise
df <- df %>%
rename_with(tolower, everything()) %>%
mutate(across(length:height,
~ case_when(!is.na(.) ~ str_c(.x, get(str_c(cur_column(), "_units")),
sep = " ")))) %>%
unite(dimensions, length, width, height, sep = " x ", na.rm = TRUE) %>%
select(dimensions, color, thickness) %>%
rowwise %>%
transmute(options_json = toJSON(keep(unclass(cur_data()),
complete.cases), auto_unbox = TRUE)) %>%
ungroup %>%
bind_cols(df, .)
-output
> df
item Length Width Height Length_units option_1 option_2 option_3 option_4 width_units height_units color thickness
1 Box 1 2 4 6 in item_length item_width item_height thickness in in <NA> 200#
2 Tape 10 3 NA ft item_length item_width color <NA> in in clear <NA>
3 Roll 1 6 3 NA yd item_length item_width <NA> <NA> in <NA> <NA> <NA>
options_json
1 {"dimensions":"2 in x 4 in x 6 in","thickness":"200#"}
2 {"dimensions":"10 ft x 3 in","color":"clear"}
3 {"dimensions":"6 yd x 3 in"}
> str(df)
'data.frame': 3 obs. of 14 variables:
$ item : chr "Box 1" "Tape" "Roll 1"
$ Length : num 2 10 6
$ Width : num 4 3 3
$ Height : num 6 NA NA
$ Length_units: chr "in" "ft" "yd"
$ option_1 : chr "item_length" "item_length" "item_length"
$ option_2 : chr "item_width" "item_width" "item_width"
$ option_3 : chr "item_height" "color" NA
$ option_4 : chr "thickness" NA NA
$ width_units : chr "in" "in" "in"
$ height_units: chr "in" "in" NA
$ color : chr NA "clear" NA
$ thickness : chr "200#" NA NA
$ options_json: 'json' chr "{\"dimensions\":\"2 in x 4 in x 6 in\",\"thickness\":\"200#\"}" "{\"dimensions\":\"10 ft x 3 in\",\"color\":\"clear\"}" "{\"dimensions\":\"6 yd x 3 in\"}"