Search code examples
rrecursionlinked-listtree

Recursive function that traces connected documents in a tibble does not stop


I wrote a recursive function get_tree that traces linked documents in a tibble named data (they are linked by the columns id_from and it_to).

However, the function will not stop running. It will repeat certain linked documents over and over again.

#data
data <- structure(list(id_from = c("K202527", "K190593", "K181502", "K163512", 
                                   "K163512", "K133701", "K143513", "K143513", "K143513", "K113372", 
                                   "K142681", "K091075", "K091075", "K111917", "K111917", "K042463", 
                                   "K042463", "K042463", "K042463", "K062282", "K062282", "K062282", 
                                   "K072558", "K072558", "K091971", "K091971", "K091971", "K091971", 
                                   "K012241", "K012241", "K013717", "K043380", "K043380", "K062282", 
                                   "K062282", "K062282", "K063222", "K063222", "K071130", "K072558", 
                                   "K072558", "K083287", "K083287", "K083287", "K090696", "K090696", 
                                   "K090696", "K090696", "K012241", "K012241", "K013717", "K043380", 
                                   "K043380", "K052240", "K063222", "K063222", "K071130", "K072558", 
                                   "K072558", "K081365", "K083287", "K083287", "K083287", "K982803", 
                                   "K982803", "K982803", "K012241", "K012241", "K013717", "K052240", 
                                   "K063222", "K063222", "K072558", "K072558", "K982803", "K982803", 
                                   "K982803", "K012241", "K012241", "K052240", "K063222", "K063222", 
                                   "K982803", "K982803", "K982803", "K012241", "K012241", "K052240", 
                                   "K982803", "K982803", "K982803", "K012241", "K012241", "K982803", 
                                   "K982803", "K982803", "K982803", "K982803", "K982803"), id_to = c("K190593", 
                                                                                                     "K181502", "K163512", "K133701", "K143513", "K113372", "K121319", 
                                                                                                     "K121628", "K142681", "K111917", "K091075", "K042463", "K062282", 
                                                                                                     "K072558", "K091971", "K930564", "K012686", "K881905", "K002544", 
                                                                                                     "K003621", "K013717", "K043380", "K012241", "K063222", "K071130", 
                                                                                                     "K072558", "K083287", "K090696", "K003707", "K982803", "K003621", 
                                                                                                     "K003621", "K013717", "K003621", "K013717", "K043380", "K012241", 
                                                                                                     "K052240", "K033451", "K012241", "K063222", "K072558", "K043604", 
                                                                                                     "K042113", "K083287", "K042022", "K081365", "K063222", "K003707", 
                                                                                                     "K982803", "K003621", "K003621", "K013717", "K012241", "K012241", 
                                                                                                     "K052240", "K033451", "K012241", "K063222", "K994331", "K072558", 
                                                                                                     "K043604", "K042113", "K843503", "K880626", "K934913", "K003707", 
                                                                                                     "K982803", "K003621", "K012241", "K012241", "K052240", "K012241", 
                                                                                                     "K063222", "K843503", "K880626", "K934913", "K003707", "K982803", 
                                                                                                     "K012241", "K012241", "K052240", "K843503", "K880626", "K934913", 
                                                                                                     "K003707", "K982803", "K012241", "K843503", "K880626", "K934913", 
                                                                                                     "K003707", "K982803", "K843503", "K880626", "K934913", "K843503", 
                                                                                                     "K880626", "K934913"), rec = c(1, 2, 3, 4, 4, 5, 5, 5, 5, 6, 
                                                                                                                                    6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 
                                                                                                                                    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 
                                                                                                                                    10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 
                                                                                                                                    11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 
                                                                                                                                    12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15
                                                                                                     )), row.names = c(NA, -99L), class = c("tbl_df", "tbl", "data.frame"
                                                                                                     ))

Example data:

head(data)
#> # A tibble: 6 × 3
#>   id_from id_to     rec
#>   <chr>   <chr>   <dbl>
#> 1 K202527 K190593     1
#> 2 K190593 K181502     2
#> 3 K181502 K163512     3
#> 4 K163512 K133701     4
#> 5 K163512 K143513     4
#> 6 K133701 K113372     5

My funciton

# iterative function
get_tree <- function(id, data) {
  cat("Processing node: ", id, "\n")
  branch <- filter(data, id_from == id) %>% distinct()
  if (nrow(branch) == 0) return()
  bind_rows(
    branch, 
    map(branch$id_to, \(x) get_tree(x, data))
  ) %>% distinct()
}

Function applied

# function applied
library(tidyverse)
library(igraph)
library(R.utils)


withTimeout( #beacuse it will not stop running
{data %>% 
  get_tree("K202527",.)},timeout=1,onTimeout = "warning")
#> Processing node:  K202527 
#> Processing node:  K190593 
#> Processing node:  K181502 
#> Processing node:  K163512 
#> Processing node:  K133701 
#> Processing node:  K113372 
#> Processing node:  K111917 
#> Processing node:  K072558 
#> Processing node:  K012241 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K063222 
#> Processing node:  K012241 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K052240 
#> Processing node:  K012241 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K012241 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K012241 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K012241 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K003707 
#> Processing node:  K982803 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 
#> Processing node:  K880626 
#> Processing node:  K934913 
#> Processing node:  K843503 



My approach:

I thought this could be caused by a cyclic document structure, however, I checked for that and could not find any cyclic connection.


# Create a graph object from the data frame
g <- graph_from_data_frame(data, directed = TRUE)

# Plot the graph
plot(g)


#check that it is not cyclic
is_dag(g)
#> [1] TRUE

I also manually checked the data and the nodes (K843503, K880626, K934913) that are processed over and over, I could not find a reason that would cause the function to run forever.


Solution

  • The code actually work. It just take longer to run.

    When you add more lower level nodes, it will take exponentially more time to run. Try data[1:50,] %>% get_tree("K202527",.) vs data[1:80,] %>% get_tree("K202527",.).

    Another issue is some path occurs multiple times. Therefore, same recursive actions are performed multiple times. Example: id_from="K982803" and id_to="K843503" apprears 6 times. The solution for this is to avoid running get_tree if the node is already processed.

    get_tree <- function(id, data) {
    
      cat("Processing node: ", id, "\n")
      
      if(id %in% processed_id){
        cat("already processed. Skipping this node.")
        return()
      }
      processed_id <<- c(processed_id,id)
      
      branch <- filter(data, id_from == id) %>% distinct()
      if (nrow(branch) == 0) return()
      bind_rows(
        branch, 
        map(branch$id_to, function(x) get_tree(x, data))
      ) %>% distinct()
    }
    

    and run the following

    processed_id <- c() #you need to reset this everytime
    data %>% 
      get_tree("K202527",.)
    

    This should finish almost instantly.