Search code examples
rjsoncurly-bracesjsonlite

Unnesting JSON with curly braces


I am trying to unnest some JSON data stored in curly braces. From the raw txt file, I can flatten the dataset using jsonlite as below:

library(jsonlite)
library(tidyverse)

data_full <-read_lines('mytxtfile.txt')
data_questions <- fromJSON(data_full[1], flatten = TRUE) ##the questions are stored in line 1 of 6
data_questions_simple <- data_questions[, c("test_part","responses")]

This results in a dataset with the data I actually need still nested in curly braces, as below:

data_questions_simple$responses

[1] NA                                                                                                                                                                           
[2] "{\"Q0\":5,\"Q1\":1,\"Q2\":2,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":4,\"Q7\":5,\"Q8\":3,\"Q9\":2}"                                                                                
[3] "{\"Q0\":3,\"Q1\":3,\"Q2\":3,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":2}"                                                                                                  
[4] "{\"Q0\":4,\"Q1\":3,\"Q2\":4,\"Q3\":4,\"Q4\":3,\"Q5\":2,\"Q6\":2,\"Q7\":4,\"Q8\":3,\"Q9\":3,\"Q10\":4,\"Q11\":3,\"Q12\":\"\"}"                                               
[5] "{\"Q0\":2,\"Q1\":2,\"Q2\":2,\"Q3\":3,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":0,\"Q8\":2}"                                                                                         
[6] "{\"Q0\":2,\"Q1\":1,\"Q2\":4,\"Q3\":4,\"Q4\":4,\"Q5\":4,\"Q6\":3,\"Q7\":3,\"Q8\":4,\"Q9\":3,\"Q10\":3,\"Q11\":4,\"Q12\":4,\"Q13\":2,\"Q14\":3,\"Q15\":3,\"Q16\":2,\"Q17\":3}"

I haven't been able to unnest the data in these curly braces - is there any way to do this that can keep the test_part label in a column next to the question responses along rows? Or would each row need to be separated into further dataframes in order to ensure the questions are associated with the appropriate test part?

The raw data is below:

[{"rt":7988.9000000059605,"stimulus":"<p>Click the button below to start the task.</p>","response":0,"trial_type":"html-button-response","trial_index":0,"time_elapsed":7991,"internal_node_id":"0.0-0.0"},{"rt":77266.30000001192,"responses":"{\"Q0\":5,\"Q1\":1,\"Q2\":2,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":4,\"Q7\":5,\"Q8\":3,\"Q9\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9]","test_part":"1","trial_type":"survey-likert-tabulate","trial_index":1,"time_elapsed":85264,"internal_node_id":"0.0-1.0-0.0"},{"rt":53696.5,"responses":"{\"Q0\":3,\"Q1\":3,\"Q2\":3,\"Q3\":2,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7]","test_part":"2","trial_type":"survey-likert-tabulate","trial_index":2,"time_elapsed":138966,"internal_node_id":"0.0-1.0-1.0"},{"rt":73241.90000000596,"responses":"{\"Q0\":4,\"Q1\":3,\"Q2\":4,\"Q3\":4,\"Q4\":3,\"Q5\":2,\"Q6\":2,\"Q7\":4,\"Q8\":3,\"Q9\":3,\"Q10\":4,\"Q11\":3,\"Q12\":\"\"}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9,10,11,12]","test_part":"3","trial_type":"survey-likert-tabulate","trial_index":3,"time_elapsed":212215,"internal_node_id":"0.0-1.0-2.0"},{"rt":52712.59999999404,"responses":"{\"Q0\":2,\"Q1\":2,\"Q2\":2,\"Q3\":3,\"Q4\":1,\"Q5\":2,\"Q6\":2,\"Q7\":0,\"Q8\":2}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8]","test_part":"4","trial_type":"survey-likert-tabulate","trial_index":4,"time_elapsed":264933,"internal_node_id":"0.0-1.0-3.0"},{"rt":85120.5,"responses":"{\"Q0\":2,\"Q1\":1,\"Q2\":4,\"Q3\":4,\"Q4\":4,\"Q5\":4,\"Q6\":3,\"Q7\":3,\"Q8\":4,\"Q9\":3,\"Q10\":3,\"Q11\":4,\"Q12\":4,\"Q13\":2,\"Q14\":3,\"Q15\":3,\"Q16\":2,\"Q17\":3}","text_responses":"{}","question_order":"[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17]","test_part":"5","trial_type":"survey-likert-tabulate","trial_index":5,"time_elapsed":350062,"internal_node_id":"0.0-1.0-4.0"}]

Any help is greatly appreciated!


Solution

  • You can use the fromJSON function again to parse the data nested in curly braces and store the result in the list. For instance, something like this:

    require(tidyverse)
    require(jsonlite)
    
    path_to_json <- "../Downloads/test_json.json"
    
    json_string <- readr::read_lines(path_to_json)
    data_parsed <- jsonlite::fromJSON(json_string, simplifyVector = FALSE)
    
    data_responses <- map(data_parsed, function(i) {
        r <- i$responses
        if (!is.null(r) && !is.na(r)) {
            r <- jsonlite::fromJSON(r, simplifyVector = FALSE)
        }
        r
    })
    
    test_parts <- map(data_parsed, function(i) {
        t <- i$test_part
        if (is.null(t) || is.na(t)) {
            t <- NA_character_
        }
        t
    })
    
    names(data_responses) <- test_parts
    
    data_responses
    

    The checks for NULL/NA are needed to handle the first element of parsed JSON that doesn't contain fields with names "responses" or "test_part".