Search code examples
rlistlapplypurrrtibble

Get coefficents from a list of tibble


I have a list of tibbles (too many to do it manually) which are made up of data I need to tabulate. I want to get the p-values, but let say I want to get multiple columns and multiple rows from sthg like this:

sample$ppara
# A tibble: 9 × 8
effect   group    term            estimate std.error    df statistic   p.value
  <chr>    <chr>    <chr>              <dbl>     <dbl> <dbl>     <dbl>     <dbl>
1 fixed    NA       (Intercept)      8.23      0.572     129   14.4     1.27e-28
2 fixed    NA       edad0           -0.00300   0.00771   129   -0.389   6.98e- 1
3 fixed    NA       time            -0.00579   0.0980     93   -0.0591  9.53e- 1
4 fixed    NA       grup_int         0.104     0.116     129    0.895   3.73e- 1
5 fixed    NA       time:grup_int   -0.0453    0.0457     93   -0.990   3.25e- 1
6 ran_pars id       sd_(Intercept)   0.819    NA          NA   NA      NA       
7 ran_pars id       cor_time.(Inte… -0.827    NA          NA   NA      NA       
8 ran_pars id       sd_time          0.309    NA          NA   NA      NA       
9 ran_pars Residual sd_Observation   0.356    NA          NA   NA      NA       

Specifically I want to retain the name of variable (ppara) when tabulating results

I am looking for 2 approaches: old base syntax and/or apply and the purrr::map functions

#1
out <- map2_df(.x = sample,
               .y = names(sample),
               .f = ~ {
                 temp <- sample[[.]]$p.value
                 })

#2
out <- map2_df(.x = sample,
               .y = names(sample),
               .f = ~ {
                 temp <- sample[[.x]]$p.value
                 })



sapply(sample, function(x) x[5,8])

Desired output

Var     term         p.value
ppara   time          0.048
ppara   grup_int       0.7
pparg   time          0.03
pparg   grup_int       0.9
lrp1     time         0.048
lrp1   grup_int       0.7

The database

sample <- 
list(ppara = structure(list(effect = c("fixed", "fixed", "fixed", 
"fixed", "fixed", "ran_pars", "ran_pars", "ran_pars", "ran_pars"
), group = c(NA, NA, NA, NA, NA, "id", "id", "id", "Residual"
), term = c("(Intercept)", "edad0", "time", "grup_int", "time:grup_int", 
"sd_(Intercept)", "cor_time.(Intercept)", "sd_time", "sd_Observation"
), estimate = c(8.23144341495959, -0.00299788328827405, -0.00578981152447268, 
0.103780244730389, -0.0452613750584519, 0.818909993116146, -0.827336729479646, 
0.30909890538813, 0.355517251103403), std.error = c(0.571643342465992, 
0.00770825512293783, 0.0979783085964459, 0.116001719438183, 0.0457364054219781, 
NA, NA, NA, NA), df = c(129, 129, 93, 129, 93, NA, NA, NA, NA
), statistic = c(14.399613891155, -0.388918534799542, -0.0590927890817121, 
0.894644021080163, -0.989613736384761, NA, NA, NA, NA), p.value = c(1.27025495304043e-28, 
0.697977615162918, 0.95300499469296, 0.372643612464504, 0.324930741925854, 
NA, NA, NA, NA)), row.names = c(NA, -9L), class = c("tbl_df", 
"tbl", "data.frame")), ppard = structure(list(effect = c("fixed", 
"fixed", "fixed", "fixed", "fixed", "ran_pars", "ran_pars", "ran_pars", 
"ran_pars"), group = c(NA, NA, NA, NA, NA, "id", "id", "id", 
"Residual"), term = c("(Intercept)", "edad0", "time", "grup_int", 
"time:grup_int", "sd_(Intercept)", "cor_time.(Intercept)", "sd_time", 
"sd_Observation"), estimate = c(5.18359198577722, 0.00228679471444601, 
-0.135473971604774, 0.0593901933594388, -0.00917122634564429, 
0.615877528429973, -0.768824349334309, 0.260993158259707, 0.312749339804677
), std.error = c(0.43022445547674, 0.00587133152604662, 0.0733591723862023, 
0.0798423671078272, 0.0340855128294131, NA, NA, NA, NA), df = c(148, 
148, 138, 148, 138, NA, NA, NA, NA), statistic = c(12.0485758533489, 
0.389484856084391, -1.84672164636163, 0.743843093720308, -0.269065229898206, 
NA, NA, NA, NA), p.value = c(9.96588328228921e-24, 0.697477383279218, 
0.066929504853735, 0.45815070193322, 0.788281346016441, NA, NA, 
NA, NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", 
"data.frame")), pparg = structure(list(effect = c("fixed", "fixed", 
"fixed", "fixed", "fixed", "ran_pars", "ran_pars", "ran_pars", 
"ran_pars"), group = c(NA, NA, NA, NA, NA, "id", "id", "id", 
"Residual"), term = c("(Intercept)", "edad0", "time", "grup_int", 
"time:grup_int", "sd_(Intercept)", "cor_time.(Intercept)", "sd_time", 
"sd_Observation"), estimate = c(11.7849185400239, 0.00500482392946355, 
-0.0287950497053358, 0.0531237373210473, -0.0307355823546849, 
0.850075520195338, -0.547821112109215, 0.258949971203219, 0.376723078311319
), std.error = c(0.694859635848458, 0.00979221254717485, 0.0803747844308631, 
0.10509839899698, 0.0374101925206616, NA, NA, NA, NA), df = c(148, 
148, 136, 148, 136, NA, NA, NA, NA), statistic = c(16.9601426418069, 
0.511102460792428, -0.358259744137849, 0.505466665791681, -0.821583111012051, 
NA, NA, NA, NA), p.value = c(1.61500691299714e-36, 0.610040924262626, 
0.720704418237478, 0.613983052245881, 0.41275215624961, NA, NA, 
NA, NA)), row.names = c(NA, -9L), class = c("tbl_df", "tbl", 
"data.frame")))

Solution

  • Here is another simple approach:

    f <- function(d,terms = c("time", "grup_int")) filter(d,term %in% terms) %>% select(term,p.value)
    
    map(sample, f) %>% bind_rows(.id = "Var")
    

    Output:

      Var   term     p.value
      <chr> <chr>      <dbl>
    1 ppara time      0.953 
    2 ppara grup_int  0.373 
    3 ppard time      0.0669
    4 ppard grup_int  0.458 
    5 pparg time      0.721 
    6 pparg grup_int  0.614