Search code examples
rsapplytapply

Summarising data in a list in R


I have numerous dataframes all contained in a list called 1a1 the names in the list are dates when data were collected e.g.

 names(1a1)
[1] "Jan4" "Jan5" "Jan6" "Jan7" "Jan8" "Jan9" "Jan10"

all the dataframes in the list are in the same format

e.g.

 dput(Jan4)
structure(list(Species = c("bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "bluti", "bluti", "bluti", 
"bluti", "bluti", "bluti", "bluti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti", "greti", "greti", "greti", 
"greti", "greti", "greti", "greti"), Pit.tag = c("01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD77C", "01103FD77C", "01103FD77C", "01103FD77C", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD77C", "01103FD77C", "01103FD77C", 
"01103FD77C", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", 
"01103FD6EF", "01103FD6EF", "01103FD6EF", "01103FD6EF", "01103F9F29", 
"01103F9F29", "01103F9F29", "01103F9F29", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "01103F9F29", "01103F9F29", 
"01103F9F29", "01103F9F29", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", "0700EDADB8", 
"0700EDADB8", "0700EDADB8", "01103F9F29", "01103F9F29", "01103F9F29", 
"01103F9F29"), Date = c("04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021", 
"04-01-2021", "04-01-2021", "04-01-2021", "04-01-2021"), Time = c("08:01:41", 
"08:01:42", "08:01:42", "08:03:09", "08:03:09", "08:03:10", "08:02:57", 
"08:02:57", "08:02:58", "08:03:36", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:01:41", "08:01:42", "08:01:42", 
"08:03:09", "08:03:09", "08:03:10", "08:02:57", "08:02:57", "08:02:58", 
"08:03:36", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:01:41", "08:01:42", "08:01:42", "08:03:09", "08:03:09", 
"08:03:10", "08:02:26", "08:02:26", "08:03:37", "08:03:38", "08:00:43", 
"08:00:44", "08:00:44", "08:01:39", "08:01:39", "08:01:40", "08:01:40", 
"08:02:54", "08:02:54", "08:02:55", "08:00:43", "08:00:44", "08:00:44", 
"08:01:39", "08:01:39", "08:01:40", "08:01:40", "08:02:54", "08:02:54", 
"08:02:55", "08:02:26", "08:02:26", "08:03:37", "08:03:38", "08:00:43", 
"08:00:44", "08:00:44", "08:01:39", "08:01:39", "08:01:40", "08:01:40", 
"08:02:54", "08:02:54", "08:02:55", "08:02:26", "08:02:26", "08:03:37", 
"08:03:38")), row.names = c(NA, -92L), class = "data.frame")

What I would like to do is create a new dataframe which summarises these data where I can see how many times an idividual Pit.tag was seen (create a new variable called No_of_visits) and which Species it belonged to

e.g.

Pit.tag     Species   No_of_visits
01103FD6EF   bluti    47

I can get most of the info I need by using variations of

Visitsbypit<-sapply(tapply(1a1$`Jan4`$Species, 1a1$`Jan4`$Pit.tag, length), unique)
Vistsbyspecies<-sapply(tapply(1a1$`Jan4`$Pit.tag, 1a1$`Jan4`$Species, length), unique)


Visitsbypit
01103F9776 01103FA8DD 01103FC9DE 
        10        133        255

Vistsbyspecies
greti bluti 
   10   388

Note: These data above will not match the data I've provided here they are from another dataframe

This also doesn't really get me what I'm after, so Visitsbyspecies is just the total visits by each species and not linked to the Pit.tag record. I would like to link the Pit.tag records in Visitsbypit to their corresponding species

But I can't make the next leap and put this info together. I'm also thinking that there must be a better way for this to be done across the list rather than having to specify for e.g. 1a1$`Jan4`$Species that seems to me to defeat the purpose of having the dataframes in a list and using sapply and tapply


Solution

  • Do you want this?

    library(dplyr)
    
    Jan4 %>% count(Species, Pit.tag)
    
    #  Species    Pit.tag  n
    #1   bluti 01103FD6EF 42
    #2   bluti 01103FD77C  8
    #3   greti 01103F9F29 12
    #4   greti 0700EDADB8 30
    

    To apply to list of dataframes use lapply/purrr::map -

    lapply(`1a1`, function(x) x %>% count(Species, Pit.tag))