Search code examples
rjsondataframenested

Merge multiple API JSON (nested) data to a dataframe in R


I want to merge data from multiple API calls into one dataframe (api resource is the employee ID). The goal is to create a new dataframe (or add new attributes to existing retrieved employees dataframe). For example the dataframe will have all the attributes from employees + the attributes department, top level department, rooms.

Here are my 3 tries that dont work:

library(jsonlite)
library(httr)
library(dplyr)

# set API endpoint and parameters
api_endpoint <- "https://at.tuke.sk/api/employee"
params <- list(query = "data", type = "json")

# make API request
response <- GET(api_endpoint, query = params)

# convert response to JSON format
employees <- fromJSON(content(response, "text"), flatten = TRUE)
employees <- employees[!duplicated(employees$idEmployee), ] 


# list of employee ids
employee_ids <- employees[[1]]

for(i in 1:length(employee_ids)) {
  
  # Build the API URL with the new county code
  API_URL <- paste0('https://at.tuke.sk/api/employee/', employee_ids[i])
  
  # Store the raw and processed API results in temporary objects
  temp_raw <- GET(API_URL)
  temp_list <- fromJSON(rawToChar(temp_raw$content), flatten = TRUE)
  
  library(plyr)
  #employee_df <- rbind.fill(employees, temp_list)
  #employee_df <- data.table::rbindlist(temp_list, fill = TRUE)

  #employee_df <- bind_rows(employees, temp_list)
  
}


###############################################################
rf <- GET("https://at.tuke.sk/api/employee/")
rfc <- content(rf)

json_file <- sapply(rfc, function(x) {
  x[sapply(x, is.null)] <- NA
  unlist(x)
  as.data.frame(t(x))
})

library(data.table)
data.table::rbindlist(json_file, fill= TRUE)
##########################################################################

library(httr)
library(jsonlite)
# list of employee ids
#employee_ids <- c(77824, 6089, 6741, 1628, 9351, 16195, 9238, 9282, 6084, 77604)
employee_ids <- employees[[1]]
# create a data frame to store employee information
employee_df <- data.frame(
  id = integer(),
  first_name = character(),
  last_name = character(),
  email = character(),
  phone = character(),
  department = character(),
  stringsAsFactors = FALSE
)
# loop through employee ids
for (id in employee_ids) {
  # set API endpoint and parameters
  api_endpoint <- paste0("https://at.tuke.sk/api/employee/", id)
  params <- list(query = "data", type = "json")
  # make API request
  response <-
    try(GET(api_endpoint, query = params, content_type("application/json")), silent = TRUE)
  # convert response to JSON format
  employee <-
    try(fromJSON(content(response, "text"), flatten = TRUE), silent = TRUE)
  # extract required fields and add to dataframe
  if (inherits(employee, "list") && !is.null(employee$department$name)) {
    employee_info <- data.frame(
      id = employee$employee$idEmployee,
      first_name = employee$employee$firstName,
      last_name = employee$employee$lastName,
      email = employee$employee$email,
      phone = employee$employee$phone,
      department = employee$department$name,
      stringsAsFactors = FALSE
    )
    employee_df <- rbind(employee_df, employee_info)
  }
}

Solution

  • Here's one way to flatten those responses, following example requests details for the first 5 id-s:

    library(jsonlite)
    library(purrr)
    library(dplyr)
    library(tidyr)
    
    # main list
    empl_main <- fromJSON("https://at.tuke.sk/api/employee?query=data&type=json") %>% 
      as_tibble()
    
    # details for first 5 id-s
    empl_details <- map(empl_main$idEmployee[1:5], 
                        \(idE) fromJSON(paste0("https://at.tuke.sk/api/employee/",idE), flatten = TRUE)) %>% 
      tibble(empl = .) %>% 
      unnest_wider(empl) %>% 
      # from 'employee' hoist only 'idEmployee' for joining, drop the rest
      hoist ("employee", "idEmployee") %>% 
      select (-employee) %>% 
      # unnest 'department', 'topLevelDepartment', 'rooms'
      unnest_wider(c(department, topLevelDepartment, rooms), names_sep ="_")
    
    # join 2 tibbles
    empl_j <- left_join(empl_main, empl_details)
    #> Joining with `by = join_by(idEmployee, publonsUrl)`
    

    Result:

    # 1443 x 75 tibble, includes flattened details for first 5 id-s
    
    glimpse(empl_j)
    #> Rows: 1,443
    #> Columns: 75
    #> $ idEmployee                                <int> 77824, 6089, 6741, 1628, 935…
    #> $ firstName                                 <chr> "Tomáš", "Norbert", "Františ…
    #> $ lastName                                  <chr> "Adam", "Ádám", "Adamčík", "…
    #> $ titleBefore                               <chr> "Ing.", "doc. Ing.", "prof. …
    #> $ titleAfter                                <chr> NA, "PhD.", "CSc.", "PhD.", …
    #> $ email                                     <chr> "tomas.ada....ke.sk", "norbe…
    #> $ phone                                     <chr> "+401 55 602...6", "+421 55 …
    #> $ web                                       <lgl> NA, NA, NA, NA, NA, NA, NA, …
    #> $ login                                     <chr> "ta...kn", "an...wq", "fa...…
    #> $ employmentType                            <chr> "D", "P", "P", "P", "N", "D"…
    #> $ idDepartment                              <int> 104004, 104009, 109003, 1051…
    #> $ idRoom                                    <int> 481, 249, 3990, 37, 2086, 19…
    #> $ idRoomEmployeePosition                    <int> 3358, 1830, 3189, 1609, 1960…
    #> $ position                                  <chr> "doktorand", "docent", "prof…
    #> $ positionEn                                <chr> "PhD Student", "Associate Pr…
    #> $ color                                     <chr> "#EAAA00", "#EAAA00", "#41B6…
    #> $ publonsUrl                                <chr> NA, NA, NA, NA, NA, NA, NA, …
    #> $ fullName                                  <chr> "Ing. Tomáš Adam", "doc. Ing…
    #> $ orcidUrl                                  <chr> NA, NA, "https://orcid.org/0…
    #> $ department_idDepartment                   <int> 104004, NA, 109003, NA, 1901…
    #> $ department_acronym                        <chr> "KKUI", NA, "KA", NA, "ÚVVaD…
    #> $ department_name                           <chr> "Katedra kybernetiky a umele…
    #> $ department_nameEn                         <chr> "Department of Cybernetics a…
    #> $ department_color                          <chr> "#EAAA00", NA, "#41B6E6", NA…
    #> $ department_web                            <chr> "https://kkui.fei.tuke.sk", …
    #> $ topLevelDepartment_idDepartment           <int> 1040, NA, 1090, NA, 1900, NA…
    #> $ topLevelDepartment_acronym                <chr> "FEI", NA, "LF", NA, "R", NA…
    #> $ topLevelDepartment_name                   <chr> "Fakulta elektrotechniky a i…
    #> $ topLevelDepartment_nameEn                 <chr> "Faculty of Electrical Engin…
    #> $ topLevelDepartment_color                  <chr> "#EAAA00", NA, "#41B6E6", NA…
    #> $ topLevelDepartment_web                    <chr> "http://fei.tuke.sk", NA, "h…
    #> $ rooms_idRoomEmployeePosition              <int> 3358, NA, 3189, NA, 1960, NA…
    #> $ rooms_idRoom                              <int> 481, NA, 3990, NA, 2086, NA,…
    #> $ rooms_idEmployee                          <int> 77824, NA, 6741, NA, 9351, N…
    #> $ rooms_idEmployeePosition                  <int> 29, NA, 23, NA, 131, NA, NA,…
    #> $ rooms_orderOnLabel                        <int> 1, NA, NA, NA, 1, NA, NA, NA…
    #> $ rooms_idDepartment                        <int> 104004, NA, 109003, NA, 1901…
    #> $ rooms_room.idRoom                         <int> 481, NA, 3990, NA, 2086, NA,…
    #> $ rooms_room.idRoomType                     <int> 4, NA, 4, NA, 4, NA, NA, NA,…
    #> $ rooms_room.idBuilding                     <int> 1, NA, 153, NA, 20, NA, NA, …
    #> $ rooms_room.number                         <chr> "154", NA, "047", NA, "A119"…
    #> $ rooms_room.name                           <chr> NA, NA, NA, NA, "Referát dom…
    #> $ rooms_room.nameEn                         <lgl> NA, NA, NA, NA, NA, NA, NA, …
    #> $ rooms_room.level                          <chr> "1. poschodie", NA, "Prízemi…
    #> $ rooms_room.roomType.idRoomType            <int> 4, NA, 4, NA, 4, NA, NA, NA,…
    #> $ rooms_room.roomType.name                  <chr> "Kancelária", NA, "Kancelári…
    #> $ rooms_room.roomType.nameEn                <chr> "Office", NA, "Office", NA, …
    #> $ rooms_room.roomType.pictogram             <lgl> NA, NA, NA, NA, NA, NA, NA, …
    #> $ rooms_room.building.idBuilding            <int> 1, NA, 153, NA, 20, NA, NA, …
    #> $ rooms_room.building.name                  <chr> "V4", NA, "R7-16", NA, "L9",…
    #> $ rooms_room.building.address               <chr> "Vysokoškolská 4", NA, "Ramp…
    #> $ rooms_room.building.latitude              <dbl> 48.73419, NA, NA, NA, 48.730…
    #> $ rooms_room.building.longitude             <dbl> 21.24496, NA, NA, NA, 21.245…
    #> $ rooms_room.building.levels                <lgl> NA, NA, NA, NA, NA, NA, NA, …
    #> $ rooms_employee.idEmployee                 <int> 77824, NA, 6741, NA, 9351, N…
    #> $ rooms_employee.firstName                  <chr> "Tomáš", NA, "František", NA…
    #> $ rooms_employee.lastName                   <chr> "Adam", NA, "Adamčík", NA, "…
    #> $ rooms_employee.titleBefore                <chr> "Ing.", NA, "prof. Ing.", NA…
    #> $ rooms_employee.titleAfter                 <chr> NA, NA, "CSc.", NA, "PhD.", …
    #> $ rooms_employee.email                      <chr> "tomas.ad....uke.sk", NA, "f…
    #> $ rooms_employee.phone                      <chr> "+4.1 55 602...6", NA, "+421…
    #> $ rooms_employee.web                        <lgl> NA, NA, NA, NA, NA, NA, NA, …
    #> $ rooms_employee.login                      <chr> "ta857kn", NA, "fa777fa", NA…
    #> $ rooms_employee.employmentType             <chr> "D", NA, "P", NA, "N", NA, N…
    #> $ rooms_employee.idDepartment               <int> 104004, NA, 109003, NA, 1901…
    #> $ rooms_employee.fullName                   <chr> "Ing. Tomáš Adam", NA, "prof…
    #> $ rooms_employeePosition.idEmployeePosition <int> 29, NA, 23, NA, 131, NA, NA,…
    #> $ rooms_employeePosition.name               <chr> "doktorand", NA, "profesor",…
    #> $ rooms_employeePosition.nameEn             <chr> "PhD Student", NA, "Professo…
    #> $ rooms_department.idDepartment             <int> 104004, NA, 109003, NA, 1901…
    #> $ rooms_department.acronym                  <chr> "KKUI", NA, "KA", NA, "ÚVVaD…
    #> $ rooms_department.name                     <chr> "Katedra kybernetiky a umele…
    #> $ rooms_department.nameEn                   <chr> "Department of Cybernetics a…
    #> $ rooms_department.color                    <chr> "#EAAA00", NA, "#41B6E6", NA…
    #> $ rooms_department.web                      <chr> "https://kkui.fei.tuke.sk", …
    

    Created on 2023-03-21 with reprex v2.0.2