Search code examples
rdataframelapplysapplydo.call

Extract data from a nested list and return a data.frame


dput(head(z2,10))

structure(list(name = list("Mary"), department = structure(list(
    name = list("English")), .Names = "name", id = "300"), department = structure(list(
    name = list("Math")), .Names = "name", id = "301"), 
    department = structure(list(name = list("Chinese")), .Names = "name", id = "302f"), 
    department = structure(list(name = list("German")), .Names = "name", id = "302"), 
    department = structure(list(name = list("German")), .Names = "name", id = "302f"), 
    department = structure(list(name = list("Music")), .Names = "name", id = "303"), 
    department = structure(list(name = list("Sport")), .Names = "name", id = "305"), 
    department = structure(list(name = list("Chemistry")), .Names = "name", id = "306"), 
    department = structure(list(name = list("Science")), .Names = "name", id = "308")), .Names = c("name", 
"department", "department", "department", "department", "department", "department", 
"department", "department", "department"))

I'm trying to extract information out of a list and put it into a data.frame, from my previous post, I just learned that you can use a do.call function to format it, I want to output it as a data frame.

Here's the code from the answer in that post:

c <- do.call(rbind,             
        lapply(unname(z2), 
               function(x) { 
                 temp <- unlist(x) 
                 data.frame(names(temp) == "name",
                            temp[names(temp) == "department.name"], 
                            unlist(sapply(x, attr, "id")), 
                            row.names=NULL) 
               })) 

Error in data.frame(names(temp) == "name", temp[names(temp) == "department.name"], :
arguments imply differing number of rows: 1, 0


new data:

structure(list(code = list("1"), note = list("success"), category = structure(list(
    name = list("Mary"), department = structure(list(name = list(
        "Math")), .Names = "name", id = "300"), department = structure(list(
        name = list("English")), .Names = "name", id = "301"), 
    department = structure(list(name = list("Chinese")), .Names = "name", id = "302f"), 
    department = structure(list(name = list("Music")), .Names = "name", id = "317")), .Names = c("name", 
"department", "department", "department", "department", "department", "department", 
"department", "department", "department", "department", "department", "department", 
"department", "department", "department")), category = structure(list(
    name = list("Kevin"), department = structure(list(name = list(
        "Physics")), .Names = "name", id = "12G0"), department = structure(list(
        name = list("German")), .Names = "name", id = "321"), 
    department = structure(list(name = list("French")), .Names = "name", id = "325"), 
    department = structure(list(name = list("Spanish")), .Names = "name", id = "427")), .Names = c("name", 
"department", "department", "department", "department", "department", "department", 
"department", "department", "department", "department")), category = structure(list(
    name = list("Andy"), department = structure(list(name = list(
        "Swedish")), .Names = "name", id = "330"), department = structure(list(
        name = list("Danish")), .Names = "name", id = "331"), 
    department = structure(list(name = list("Russian")), .Names = "name", id = "332"), 
    department = structure(list(name = list("Japanese")), .Names = "name", id = "341")), .Names = c("name", 
"department", "department", "department", "department", "department", "department", 
"department", "department", "department", "department", "department", "department", 
"department", "department", "department", "department", "department", "department", 
"department", "department", "department")), category = structure(list(
    name = list("Nana"), department = structure(list(name = list(
        "Arabic")), .Names = "name", id = "200"), department = structure(list(
        name = list("African")), .Names = "name", id = "201"), 
    department = structure(list(name = list("Sport")), .Names = "name", id = "202"), 
    department = structure(list(name = list("Korean")), .Names = "name", id = "211")), .Names = c("name", 
"department", "department", "department", "department", "department", "department", 
"department")), category = structure(list(name = list("Sandy"), 
    department = structure(list(name = list("Vocals")), .Names = "name", id = "100"), 
    department = structure(list(name = list("Language")), .Names = "name", id = "515")), .Names = c("name", 
"department", "department", "department", "department", "department", "department", 
"department", "department", "department", "department", "department", "department"
))), .Names = c("code", "note", "category", "category", "category", 
"category", "category"))

Solution

  • Edited answer based on OP's new sample with nested lists, each representing one user (modified version of dataset reproduced below because there were more names than elements in each category, which didn't really make sense).

    Data:

    z2 <- structure(list(code = list("1"), 
                         note = list("success"), 
                         category = structure(list(name = list("Mary"), 
                                                   department = structure(list(name = list("Math")), .Names = "name", id = "300"), 
                                                   department = structure(list(name = list("English")), .Names = "name", id = "301"), 
                                                   department = structure(list(name = list("Chinese")), .Names = "name", id = "302f"), 
                                                   department = structure(list(name = list("Music")), .Names = "name", id = "317")), 
                                              .Names = c("name", "department", "department", "department", "department")), 
                         category = structure(list(name = list("Kevin"), 
                                                   department = structure(list(name = list("Physics")), .Names = "name", id = "12G0"), 
                                                   department = structure(list(name = list("German")), .Names = "name", id = "321"), 
                                                   department = structure(list(name = list("French")), .Names = "name", id = "325"), 
                                                   department = structure(list(name = list("Spanish")), .Names = "name", id = "427")), 
                                              .Names = c("name", "department", "department", "department", "department")), 
                         category = structure(list(name = list("Andy"), 
                                                   department = structure(list(name = list("Swedish")), .Names = "name", id = "330"), 
                                                   department = structure(list(name = list("Danish")), .Names = "name", id = "331"), 
                                                   department = structure(list(name = list("Russian")), .Names = "name", id = "332"), 
                                                   department = structure(list(name = list("Japanese")), .Names = "name", id = "341")), 
                                              .Names = c("name", "department", "department", "department", "department")), 
                         category = structure(list(name = list("Nana"), 
                                                   department = structure(list(name = list("Arabic")), .Names = "name", id = "200"), 
                                                   department = structure(list(name = list("African")), .Names = "name", id = "201"), 
                                                   department = structure(list(name = list("Sport")), .Names = "name", id = "202"), 
                                                   department = structure(list(name = list("Korean")), .Names = "name", id = "211")), 
                                              .Names = c("name", "department", "department", "department", "department")), 
                         category = structure(list(name = list("Sandy"), 
                                                   department = structure(list(name = list("Vocals")), .Names = "name", id = "100"), 
                                                   department = structure(list(name = list("Language")), .Names = "name", id = "515")), 
                                              .Names = c("name", "department", "department"))), 
                    .Names = c("code", "note", "category", "category", "category", "category", "category"))
    

    Drop unneeded elements from the list:

    # keep only category elements (i.e. drop code, note, etc. from the list)
    z2 <- z2[which(names(z2)=="category")]
    

    Convert each nested list into a data frame & bind them together:

    do.call(rbind,
            lapply(unname(z2),
                   function(y){
                     data.frame(
                       name = y[[which(names(y)=="name")]][[1]],
                       department = sapply(y[which(names(y)=="department")], function(x){x[[1]][[1]]}),
                       id = sapply(y[which(names(y)=="department")], function(x){attr(x, "id")})
                     )
                   }))
    
        name department   id
    1   Mary       Math  300
    2   Mary    English  301
    3   Mary    Chinese 302f
    4   Mary      Music  317
    5  Kevin    Physics 12G0
    6  Kevin     German  321
    7  Kevin     French  325
    8  Kevin    Spanish  427
    9   Andy    Swedish  330
    10  Andy     Danish  331
    11  Andy    Russian  332
    12  Andy   Japanese  341
    13  Nana     Arabic  200
    14  Nana    African  201
    15  Nana      Sport  202
    16  Nana     Korean  211
    17 Sandy     Vocals  100
    18 Sandy   Language  515