I read the data from a json file using this option
library("rjson")
result <- fromJSON(file = "input.json")
And the result is a list.
I would like to convert this list into a dataframe with colum names.
However, the list has many nested options. How is it possible to convert them into a dataframe?
Here an example of the first json element:
{ "id": 6119227, "node_id": "MDEwOlJlcG9zaXRvcnk2MTE5MjI3", "name": "sentiment", "full_name": "thisandagain/sentiment", "private": false, "owner": { "login": "thisandagain", "id": 747641, "node_id": "MDQ6VXNlcjc0NzY0MQ==", "avatar_url": "https://avatars.githubusercontent.com/u/747641?v=4", "gravatar_id": "", "url": "https://api.github.com/users/thisandagain", "html_url": "https://github.com/thisandagain", "followers_url": "https://api.github.com/users/thisandagain/followers", "following_url": "https://api.github.com/users/thisandagain/following{/other_user}", "gists_url": "https://api.github.com/users/thisandagain/gists{/gist_id}", "starred_url": "https://api.github.com/users/thisandagain/starred{/owner}{/repo}", "subscriptions_url": "https://api.github.com/users/thisandagain/subscriptions", "organizations_url": "https://api.github.com/users/thisandagain/orgs", "repos_url": "https://api.github.com/users/thisandagain/repos", "events_url": "https://api.github.com/users/thisandagain/events{/privacy}", "received_events_url": "https://api.github.com/users/thisandagain/received_events", "type": "User", "site_admin": false }, "html_url": "https://github.com/thisandagain/sentiment", "description": "AFINN-based sentiment analysis for Node.js.", "fork": false, "url": "https://api.github.com/repos/thisandagain/sentiment", "forks_url": "https://api.github.com/repos/thisandagain/sentiment/forks", "keys_url": "https://api.github.com/repos/thisandagain/sentiment/keys{/key_id}", "collaborators_url": "https://api.github.com/repos/thisandagain/sentiment/collaborators{/collaborator}", "teams_url": "https://api.github.com/repos/thisandagain/sentiment/teams", "hooks_url": "https://api.github.com/repos/thisandagain/sentiment/hooks", "issue_events_url": "https://api.github.com/repos/thisandagain/sentiment/issues/events{/number}", "events_url": "https://api.github.com/repos/thisandagain/sentiment/events", "assignees_url": "https://api.github.com/repos/thisandagain/sentiment/assignees{/user}", "branches_url": "https://api.github.com/repos/thisandagain/sentiment/branches{/branch}", "tags_url": "https://api.github.com/repos/thisandagain/sentiment/tags", "blobs_url": "https://api.github.com/repos/thisandagain/sentiment/git/blobs{/sha}", "git_tags_url": "https://api.github.com/repos/thisandagain/sentiment/git/tags{/sha}", "git_refs_url": "https://api.github.com/repos/thisandagain/sentiment/git/refs{/sha}", "trees_url": "https://api.github.com/repos/thisandagain/sentiment/git/trees{/sha}", "statuses_url": "https://api.github.com/repos/thisandagain/sentiment/statuses/{sha}", "languages_url": "https://api.github.com/repos/thisandagain/sentiment/languages", "stargazers_url": "https://api.github.com/repos/thisandagain/sentiment/stargazers", "contributors_url": "https://api.github.com/repos/thisandagain/sentiment/contributors", "subscribers_url": "https://api.github.com/repos/thisandagain/sentiment/subscribers", "subscription_url": "https://api.github.com/repos/thisandagain/sentiment/subscription", "commits_url": "https://api.github.com/repos/thisandagain/sentiment/commits{/sha}", "git_commits_url": "https://api.github.com/repos/thisandagain/sentiment/git/commits{/sha}", "comments_url": "https://api.github.com/repos/thisandagain/sentiment/comments{/number}", "issue_comment_url": "https://api.github.com/repos/thisandagain/sentiment/issues/comments{/number}", "contents_url": "https://api.github.com/repos/thisandagain/sentiment/contents/{+path}", "compare_url": "https://api.github.com/repos/thisandagain/sentiment/compare/{base}...{head}", "merges_url": "https://api.github.com/repos/thisandagain/sentiment/merges", "archive_url": "https://api.github.com/repos/thisandagain/sentiment/{archive_format}{/ref}", "downloads_url": "https://api.github.com/repos/thisandagain/sentiment/downloads", "issues_url": "https://api.github.com/repos/thisandagain/sentiment/issues{/number}", "pulls_url": "https://api.github.com/repos/thisandagain/sentiment/pulls{/number}", "milestones_url": "https://api.github.com/repos/thisandagain/sentiment/milestones{/number}", "notifications_url": "https://api.github.com/repos/thisandagain/sentiment/notifications{?since,all,participating}", "labels_url": "https://api.github.com/repos/thisandagain/sentiment/labels{/name}", "releases_url": "https://api.github.com/repos/thisandagain/sentiment/releases{/id}", "deployments_url": "https://api.github.com/repos/thisandagain/sentiment/deployments", "created_at": "2012-10-08T03:45:22Z", "updated_at": "2023-01-20T12:34:09Z", "pushed_at": "2020-05-18T13:55:58Z", "git_url": "git://github.com/thisandagain/sentiment.git", "ssh_url": "[email protected]:thisandagain/sentiment.git", "clone_url": "https://github.com/thisandagain/sentiment.git", "svn_url": "https://github.com/thisandagain/sentiment", "homepage": "", "size": 416, "stargazers_count": 2560, "watchers_count": 2560, "language": "JavaScript", "has_issues": true, "has_projects": true, "has_downloads": true, "has_wiki": false, "has_pages": false, "has_discussions": false, "forks_count": 319, "mirror_url": null, "archived": false, "disabled": false, "open_issues_count": 16, "license": { "key": "mit", "name": "MIT License", "spdx_id": "MIT", "url": "https://api.github.com/licenses/mit", "node_id": "MDc6TGljZW5zZTEz" }, "allow_forking": true, "is_template": false, "web_commit_signoff_required": false, "topics": [ "afinn", "analysis", "javascript", "nlp", "sentiment", "sentiment-analysis" ], "visibility": "public", "forks": 319, "open_issues": 16, "watchers": 2560, "default_branch": "develop", "score": 1.0 }
I have saved your_json-element_ as input.json
. With the package RJSONIO
this file is read in as json
. Your example is deeply nested json
.
This list may consist of lists within a list. For such cases the list
is flattened with str_c()
and transformed into tibble
, if its length is 1
it is directly transformed into a tibble
.
With purrr
's list_cbind()
this is all bind into one DF with - in this case - one row and 80 columns.
library(tidyverse)
library(RJSONIO)
json <- fromJSON("input.json", nullValue = NA)
dat <- lapply(json, function(j) {
if (length(j) > 1) {
j |>
stringr::str_c(collapse = ",") |>
as_tibble()
} else {
as_tibble(j)
}
})
purrr::list_cbind(dat)
# A tibble: 1 × 80
id$value node_id…¹ name$…² full_…³ priva…⁴ owner…⁵ html_…⁶ descr…⁷
<dbl> <chr> <chr> <chr> <lgl> <chr> <chr> <chr>
1 6119227 MDEwOlJl… sentim… thisan… FALSE thisan… https:… AFINN-…
# … with 72 more variables: fork <tibble[,1]>, url <tibble[,1]>,
# forks_url <tibble[,1]>, keys_url <tibble[,1]>,
# collaborators_url <tibble[,1]>, teams_url <tibble[,1]>,
# hooks_url <tibble[,1]>, issue_events_url <tibble[,1]>,
# events_url <tibble[,1]>, assignees_url <tibble[,1]>,
# branches_url <tibble[,1]>, tags_url <tibble[,1]>,
# blobs_url <tibble[,1]>, git_tags_url <tibble[,1]>, …
# ℹ Use `colnames()` to see all variable names
NEW EDIT based on comment
Here is another approach, which I have adopted from themockup
library(tidyverse)
library(jsonlite)
library(httr)
url_json <- "https://api.github.com/search/commits?q=sentiment&per_page=100&page=8"
raw_json <- url_json |>
httr::GET() |>
httr::content()
raw_json$items |> str(max.level = 1)
#> List of 100
rj <- raw_json |>
discard(is_empty) |>
map_if(is.data.frame, list) |>
as_tibble()
df <- rj |>
mutate(no = seq_along(rj$items)) |>
unnest_wider(items)
df
#> # A tibble: 100 × 14
#> total…¹ incom…² url sha node_id html_…³ comme…⁴ commit author
#> <int> <lgl> <chr> <chr> <chr> <chr> <chr> <list> <list>
#> 1 44460 TRUE http… 5233… MDY6Q2… https:… https:… <named list> <named list>
#> 2 44460 TRUE http… c27c… MDY6Q2… https:… https:… <named list> <named list>
#> 3 44460 TRUE http… 9779… MDY6Q2… https:… https:… <named list> <named list>
#> 4 44460 TRUE http… 5533… MDY6Q2… https:… https:… <named list> <named list>
#> 5 44460 TRUE http… d815… MDY6Q2… https:… https:… <named list> <named list>
#> 6 44460 TRUE http… eb12… MDY6Q2… https:… https:… <named list> <named list>
#> 7 44460 TRUE http… 70a3… MDY6Q2… https:… https:… <named list> <named list>
#> 8 44460 TRUE http… 8a72… MDY6Q2… https:… https:… <named list> <NULL>
#> 9 44460 TRUE http… 4ae3… MDY6Q2… https:… https:… <named list> <named list>
#> 10 44460 TRUE http… b324… MDY6Q2… https:… https:… <named list> <NULL>
#> # … with 90 more rows, 5 more variables: committer <list>, parents <list>,
#> # repository <list>, score <dbl>, no <int>, and abbreviated variable names
#> # ¹total_count, ²incomplete_results, ³html_url, ⁴comments_url
glimpse(df)
#> Rows: 100
#> Columns: 14
#> $ total_count <int> 44460, 44460, 44460, 44460, 44460, 44460, 44460, 44…
#> $ incomplete_results <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRU…
#> $ url <chr> "https://api.github.com/repos/esforexco/esforexco.g…
#> $ sha <chr> "523339a9eef676382f86caa7cb4019840089eda1", "c27ce7…
#> $ node_id <chr> "MDY6Q29tbWl0MjI1MzU4MjMwOjUyMzMzOWE5ZWVmNjc2MzgyZj…
#> $ html_url <chr> "https://github.com/esforexco/esforexco.github.io/c…
#> $ comments_url <chr> "https://api.github.com/repos/esforexco/esforexco.g…
#> $ commit <list> ["https://api.github.com/repos/esforexco/esforexco…
#> $ author <list> ["justprodev", 58386042, "MDQ6VXNlcjU4Mzg2MDQy", "…
#> $ committer <list> ["justprodev", 58386042, "MDQ6VXNlcjU4Mzg2MDQy", "…
#> $ parents <list> [["https://api.github.com/repos/esforexco/esforexc…
#> $ repository <list> [225358230, "MDEwOlJlcG9zaXRvcnkyMjUzNTgyMzA=", "e…
#> $ score <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
#> $ no <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, …