I just learned how to add branches to linked documents (document trees).
Now I am trying to do the opposite, i.e., cut branches of document trees according to a lookup list by using a function.
Reproducible example:
library(tidyverse)
# list of document trees
df1 <- tibble(id_from=c(NA_character_,"111","222","333","444","444","aaa","bbb","x","x"),
id_to=c("111","222","333","444","aaa","bbb","x","ccc","x1","x1"),
level=c(0,1,2,3,4,4,5,5,6,6))
df2 <- tibble(id_from=c(NA_character_,"thank"),
id_to=c("thank","you"),
level=c(0,1))
tree_list <- list(df1,df2)
tree_list
#> [[1]]
#> # A tibble: 10 × 3
#> id_from id_to level
#> <chr> <chr> <dbl>
#> 1 <NA> 111 0
#> 2 111 222 1
#> 3 222 333 2
#> 4 333 444 3
#> 5 444 aaa 4
#> 6 444 bbb 4
#> 7 aaa x 5
#> 8 bbb ccc 5
#> 9 x x1 6
#> 10 x x1 6
#>
#> [[2]]
#> # A tibble: 2 × 3
#> id_from id_to level
#> <chr> <chr> <dbl>
#> 1 <NA> thank 0
#> 2 thank you 1
# lookup list, i.e. branches that I want to cut
cut1 <- tibble(id_from=c("444"),
id_to=c("aaa"))
cut2 <- tibble(id_from=c("thank"),
id_to=c("you"))
cut3 <- tibble(id_from=c("bbb"),
id_to=c("ccc"))
cut4 <- tibble(id_from=c("x"),
id_to=c("x1"))
cut_lookup <- list(cut1,cut2,cut3,cut4)
cut_lookup
#> [[1]]
#> # A tibble: 1 × 2
#> id_from id_to
#> <chr> <chr>
#> 1 444 aaa
#>
#> [[2]]
#> # A tibble: 1 × 2
#> id_from id_to
#> <chr> <chr>
#> 1 thank you
#>
#> [[3]]
#> # A tibble: 1 × 2
#> id_from to_id
#> <chr> <chr>
#> 1 bbb ccc
#>
#> [[4]]
#> # A tibble: 1 × 2
#> id_from id_to
#> <chr> <chr>
#> 1 x x1
Created on 2023-04-02 with reprex v2.0.2
Desired output:
#> [[1]]
#> # A tibble: 5 × 3
#> id_from id_to level
#> <chr> <chr> <dbl>
#> 1 <NA> 111 0
#> 2 111 222 1
#> 3 222 333 2
#> 4 333 444 3
#> 5 444 bbb 4
#>
#> [[2]]
#> # A tibble: 1 × 3
#> id_from id_to level
#> <chr> <chr> <dbl>
#> 1 <NA> thank 0
I tried the following, but I get errors:
# function to cut branches from a single tree
cut_tree <- function(tree, cuts) {
nodes_to_cut_table <- setNames(rep(TRUE, length(cuts$id_from)), cuts$id_from)
nodes_to_cut <- unique(cuts$id_from)
tree %>%
filter(!id_from %in% nodes_to_cut) %>%
filter(!id_to %in% nodes_to_cut) %>%
filter(!id_from %in% nodes_to_cut_table) %>%
filter(!id_to %in% nodes_to_cut_table)
}
# function to apply cuts to a list of trees
cut_trees <- function(tree_list, cut_lookup) {
pmap(list(tree_list, cut_lookup), cut_tree)
}
# apply cuts to the example input
cut_trees <- cut_trees(tree_list, cut_lookup)
#> Error in `pmap()`:
#> ! Can't recycle `.l[[1]]` (size 2) to match `.l[[2]]` (size 4).
#> Backtrace:
#> ▆
#> 1. ├─global cut_trees(tree_list, cut_lookup)
#> 2. │ └─purrr::pmap(list(tree_list, cut_lookup), cut_tree)
#> 3. │ └─purrr:::pmap_("list", .l, .f, ..., .progress = .progress)
#> 4. │ └─vctrs::vec_size_common(!!!.l, .arg = ".l", .call = .purrr_error_call)
#> 5. └─vctrs::stop_incompatible_size(...)
#> 6. └─vctrs:::stop_incompatible(...)
#> 7. └─vctrs:::stop_vctrs(...)
#> 8. └─rlang::abort(message, class = c(class, "vctrs_error"), ..., call = call)
cut_trees
#> function(tree_list, cut_lookup) {
#> pmap(list(tree_list, cut_lookup), cut_tree)
#> }
Created on 2023-04-02 with reprex v2.0.2
UPDATE As discussed here; the items can merge. The items are temporally ordered (newest first), items only reference older items, they never reference a more recent item.
This solution uses the same get_tree()
function from my answer to your previous post. Now it’s iterated over by cut_branches()
to find branches in the data corresponding to the nodes in cut_lookup()
. The identified branches are then removed using dplyr::anti_join()
.
library(dplyr)
library(purrr)
get_tree <- function(id, data) {
branch <- filter(data, id_from == id)
if (nrow(branch) == 0) return()
bind_rows(
branch,
map(branch$id_to, \(x) get_tree(x, data))
)
}
cut_branches <- function(data, lookups) {
nodes_to_cut <- bind_rows(lookups)
branches_to_cut <- nodes_to_cut %>%
pull(id_to) %>%
map(\(id) get_tree(id, data)) %>%
bind_rows(nodes_to_cut)
anti_join(data, branches_to_cut, join_by(id_from, id_to))
}
map(tree_list, \(x) cut_branches(x, lookups = cut_lookup))
Result:
[[1]]
# A tibble: 5 × 3
id_from id_to level
<chr> <chr> <dbl>
1 <NA> 111 0
2 111 222 1
3 222 333 2
4 333 444 3
5 444 bbb 4
[[2]]
# A tibble: 1 × 3
id_from id_to level
<chr> <chr> <dbl>
1 <NA> thank 0