Estimate_richness for all phyla in phyloseq

Is there an easy way to get ASV richness for each Phylum for each Station using the estimate_richness function in phyloseq? Or is there another simple way of extracting the abundance data for each taxonomic rank and calculating richness that way?

So far I have just been subsetting individual Phyla of interest using for example:

ps.Prymnesiophyceae <- subset_taxa(ps, Phylum == "Prymnesiophyceae")  
alpha_diversity<-estimate_richness(ps.Prymnesiophyceae,measure=c("Shannon","Observed"))  
H<-alpha_diversity$Shannon  
S1<-alpha_diversity$Observed  
S<-log(S1)  
evenness<-H/S
alpha<-cbind(Shannon=H,Richness=S1,Evenness=evenness,sample_data(Prymnesiophyceae))

But this is rather a pain when having to do it for e.g. the top 20 phyla.

EDIT: suggestion by @GTM works well until last step. See comment + dput:

> dput(head(sample_names(ps.transect), n=2)) c("2-1-DCM_S21_L001_R1_001.fastq", "2-1-SA_S9_L001_R1_001.fastq" )
> dput(head(alpha, n=2)) structure(list(Observed = c(31, 25), Shannon = c(2.84184012598765, 
2.53358345702604), taxon = c("Prymnesiophyceae", "Prymnesiophyceae" ), sample_id = c("X2.1.DCM_S21_L001_R1_001.fastq", "X2.1.SA_S9_L001_R1_001.fastq" ), S = c(3.43398720448515,
3.2188758248682), evenness = c(0.827562817437384, 
0.787101955736294)), row.names = c("X2.1.DCM_S21_L001_R1_001.fastq",  "X2.1.SA_S9_L001_R1_001.fastq"), class = "data.frame")
> dput(head(smpl_data, n=1)) new("sample_data", .Data = list("001_DCM", 125L, structure(1L, .Label = "DCM", class = "factor"), structure(1L, .Label = "Transect", class = "factor"), structure(1L, .Label = "STZ", class = "factor"), 
    structure(1L, .Label = "STFW", class = "factor"), "Oligotrophic", 
    16L, -149.9978333, -29.997, 130.634, 17.1252, 35.4443, 1025.835008, 
    1.1968, 1e-12, 5.387, 2.8469, 52.26978546, 98.0505, 0, 0, 
    0.02, 0.9, 0, 0, 2069.47, 8.057, 377.3), names = c("Station_neat",  "Depth_our", "Depth_bin", "Loc", "Front", "Water", "Zone", "Bottle",  "Lon", "Lat", "pressure..db.", "Temperature", "Salinity", "Density_kgm.3",  "Fluorescence_ugL", "PAR", "BottleO2_mLL", "CTDO2._mLL", "OxygenSat_.",  "Beam_Transmission", "N_umolL", "NO3_umolL", "PO4_umolL", "SIL_umolL",  "NO2_umolL", "NH4_umolL", "DIC_uMkg", "pH", "pCO2_matm"), row.names = "2-1-DCM_S21_L001_R1_001.fastq", 
    .S3Class = "data.frame")

Solution

You can wrap your code in a for loop to do so. I've slightly modified your code to make it a bit more flexible, see below.

require("phyloseq")
require("dplyr")

# Calculate alpha diversity measures for a specific taxon at a specified rank.
# You can pass any parameters that you normally pass to `estimate_richness`
estimate_diversity_for_taxon <- function(ps, taxon_name, tax_rank = "Phylum", ...){
  
  # Subset to taxon of interest
  tax_tbl <- as.data.frame(tax_table(ps))
  keep <- tax_tbl[,tax_rank] == taxon_name
  keep[is.na(keep)] <- FALSE
  ps_phylum <- prune_taxa(keep, ps)
  
  # Calculate alpha diversity and generate a table
  alpha_diversity <- estimate_richness(ps_phylum, ...)
  alpha_diversity$taxon <- taxon_name
  alpha_diversity$sample_id <- row.names(alpha_diversity)
  return(alpha_diversity)
}

# Load data
data(GlobalPatterns)
ps <- GlobalPatterns

# Estimate alpha diversity for each phylum
phyla <- get_taxa_unique(ps, 
                         taxonomic.rank = 'Phylum')
phyla <- phyla[!is.na(phyla)]
alpha <- data.frame()
for (phylum in phyla){
  a <- estimate_diversity_for_taxon(ps = ps, 
                                    taxon_name = phylum,
                                    measure = c("Shannon", "Observed"))
  alpha <- rbind(alpha, a)
}

# Calculate the additional alpha diversity measures
alpha$S <- log(alpha$Observed)
alpha$evenness <- alpha$Shannon/alpha$S

# Add sample data
smpl_data <- as.data.frame(sample_data(ps))
alpha <- left_join(alpha, 
                   smpl_data, 
                   by = c("sample_id" = "X.SampleID"))

This is a reproducible example with GlobalPatterns. Make sure to alter the code to match your data by replacing X.SampleID in the left join with the name of the column that contains the sample IDs in your sample_data. If there is no such column, you can create it from the row names:

smpl_data <- as.data.frame(sample_data(ps))
smpl_data$sample_id < row.names(smpl_data)
alpha <- left_join(alpha, 
                   smpl_data, 
                   by = c("sample_id" = "sample_id"))