Search code examples
rloopslapplybiomart

Looping with biomart in R


I have a dataset list that I created based on many files.


list.function <-  function() { 
   
  sample1 <- data.frame(ensembl.id = c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
  sample2 <- data.frame(ensembl.id =  c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
  sample3 <- data.frame(ensembl.id =  c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
  sample4 <- data.frame(ensembl.id =  c("ENSG00000000005.6", "ENSG00000000003.15", "ENSG00000000419.13", "ENSG00000000457.14", "ENSG00000000460.17"), counts = c(4, 5, 6, 1, 1))
  
  sapply(paste('sample', seq(1,4,1), sep=''), get, environment(), simplify = FALSE) 
} 

my.list3 <- list.function()
my.list3



library("biomaRt")
grch38     <- useMart("ensembl",dataset="hsapiens_gene_ensembl")

I'm trying to automate this operation:


my.list4 = lapply(my.list3, function(x){
  
atributos = getBM(attributes = c("ensembl_gene_id_version", "external_gene_name",  "chromosome_name", "gene_biotype", "entrezgene_description"),
                  filters = "ensembl_gene_id_version",
                  values = x$ensembl.id,
                  mart = grch38)


atributos_unique = atributos %>% distinct(ensembl_gene_id_version, .keep_all = TRUE)


merged = merge(x, atributos_unique, by.x="ensembl.id", by.y="ensembl_gene_id_version" )


merged$gene_biotype = as.factor(merged$gene_biotype)
})

Which is correctly using all datasets, but not outputting correctly!

I need that the "merged" final output to be unique for each dataset in my "my.list3" list with the same name as the original dataset

Any ideas?


Solution

  • You are not returning the data frame in the function call.

    library(biomaRt)
    library(tidyverse)
    
    grch38 = useMart("ensembl", dataset="hsapiens_gene_ensembl")
    
    my.list4 = lapply(my.list3, function(x){
                      atributos = getBM(attributes = c("ensembl_gene_id_version",
                                                       "external_gene_name",
                                                       "chromosome_name",
                                                       "gene_biotype",
                                                       "entrezgene_description"),
                                        filters = "ensembl_gene_id_version",
                                        values = x$ensembl.id,
                                        mart = grch38)
    
    
                   atributos_unique = atributos %>% 
                                         distinct(ensembl_gene_id_version, .keep_all = TRUE)
    
    
                  merged = merge(x,
                                 atributos_unique,
                                 by.x="ensembl.id",
                                 by.y="ensembl_gene_id_version" )
    
    
                  merged$gene_biotype = as.factor(merged$gene_biotype)
                  return(merged) #or just merged
    })
    

    add return(merged) to the end of the function call.