I am new to R facing issue in converting json file to dataframe. I have the json file which looks like below:
json_file = '[{"id": "abc", "model": "honda", "date": "20190604", "cols": {"action": 15, "values": 18, "not": 29}},
{"id": "abc", "model": "honda", "date": "20190604", "cols": {"hello": 14, "hi": 85, "wow": 14}},
{"id": "mno", "model": "ford", "date": "20190604", "cols": {"yesterday": 21, "today": 21, "tomorrow": 29}},
{"id": "mno", "model": "ford", "date": "20190604", "cols": {"docs": 25, "ok": 87, "none": 42}}]'
I want to convert the above json file to dataframe in below format:
Expected Result
df =
id model date cols values_cols
abc honda 20190604 action 15
abc honda 20190604 values 18
abc honda 20190604 not 29
abc honda 20190604 hello 14
abc honda 20190604 hi 85
abc honda 20190604 wow 14
mno ford 20190604 yesterday 21
mno ford 20190604 today 21
mno ford 20190604 tomorrow 29
mno ford 20190604 docs 25
mno ford 20190604 ok 87
My Solution
require(RJSONIO)
df = fromJSON(json_file)
My result
id model date cols id.1 model.1 date.1 cols.1 id.2 model.2 date.2 cols.2 id.3 model.3 date.3 cols.3
action abc honda 20190604 15 abc honda 20190604 14 mno ford 20190604 21 mno ford 20190604 25
values abc honda 20190604 18 abc honda 20190604 85 mno ford 20190604 21 mno ford 20190604 87
not abc honda 20190604 29 abc honda 20190604 14 mno ford 20190604 29 mno ford 20190604 42
It is incorrect because it is taking index which should come as column name.
Does this do what you want?
> library(jsonlite)
> library(tidyverse)
>
> json_file = '[
+ {"id": "abc",
+ "model": "honda",
+ "date": "20190604",
+ "cols": {"action": 15, "values": 18, "not": 29}},
+ {"id": "abc",
+ "model": "honda",
+ "date": "20190604",
+ "cols": {"hello": 14, "hi": 85, "wow": 14}},
+ {"id": "mno",
+ "model": "ford",
+ "date": "20190604",
+ "cols": {"yesterday": 21, "today": 21, "tomorrow": 29}},
+ {"id": "mno",
+ "model": "ford",
+ "date": "20190604",
+ "cols": {"docs": 25, "ok": 87, "none": 42}}]'
>
> df <- fromJSON(json_file)
>
> # need to convert the internal dataframes in 'cols' to vectors
> x <- map_df(seq_along(df),
+ ~tibble(id = df$id[.x],
+ model = df$model[.x],
+ date = df$date[.x],
+ cols = names(df$cols),
+ values = sapply(df$cols, '[', .x)
+ )
+ )
>
> ## remove NAs
> x[complete.cases(x), ]
# A tibble: 12 x 5
id model date cols values
<chr> <chr> <chr> <chr> <int>
1 abc honda 20190604 action 15
2 abc honda 20190604 values 18
3 abc honda 20190604 not 29
4 abc honda 20190604 hello 14
5 abc honda 20190604 hi 85
6 abc honda 20190604 wow 14
7 mno ford 20190604 yesterday 21
8 mno ford 20190604 today 21
9 mno ford 20190604 tomorrow 29
10 mno ford 20190604 docs 25
11 mno ford 20190604 ok 87
12 mno ford 20190604 none 42
>