Search code examples
rdataframefunctionformula

Formula to calculate fpkm (ragments per kilobase of exon per million mapped fragments) in R #R


I have a dataframe. I want to calculate FPKM by using formula on columns (5,6,7,8)

fpkm = (X * 10^9) / (colSums(X) * length) # i.e. (1691 * 10^9)/(12660 * 1161) for row1


chr     start       end strand   bam_RC1 bam_RC2 bam_RC3 bam_RC4   length
chr8  85554228  85555389      - 1691           137            45           107 1161
chr11  60877576  60879894      - 2410           235            72           161 2318
chr12 108793341 108795193      - 3334           143            64           164 1852
chr2  92432803  92434088      - 1043            60            28            66 1285
chr13 100124401 100125923      - 1479           114            62           129 1522
chr7 109519172 109521109      - 2703           123            36            95 1937

Can anyone please tell me who to create such function and write values in separate columns. Thanks


Solution

  • We may use across in mutate

    library(dplyr)
    df1 <- df1 %>%
       mutate(across(5:8, ~ (.x * 10^9)/(sum(.x, na.rm = TRUE) * length), .names = "fpkm_{.col}"))
    

    -output

    df1
        chr     start       end strand bam_RC1 bam_RC2 bam_RC3 bam_RC4 length fpkm_bam_RC1 fpkm_bam_RC2 fpkm_bam_RC3 fpkm_bam_RC4
    1  chr8  85554228  85555389      -    1691     137      45     107   1161    115047.63    145322.32    126253.06    127648.10
    2 chr11  60877576  60879894      -    2410     235      72     161   2318     82123.96    124852.83    101176.74     96200.04
    3 chr12 108793341 108795193      -    3334     143      64     164   1852    142197.16     95090.91    112564.28    122649.47
    4  chr2  92432803  92434088      -    1043      60      28      66   1285     64113.20     57503.21     70976.82     71138.32
    5 chr13 100124401 100125923      -    1479     114      62     129   1522     76757.32     92243.16    132690.14    117391.83
    6  chr7 109519172 109521109      -    2703     123      36      95   1937    110225.66     78202.29     60538.90     67929.24
    

    or using base R

    df1[paste0("fpkm_", names(df1)[5:8])] <-
       (df1[5:8] * 10^9)/(colSums(df1[5:8],
         na.rm = TRUE)[col(df1[5:8])] * df1$length)
    

    data

    df1 <- structure(list(chr = c("chr8", "chr11", "chr12", "chr2", "chr13", 
    "chr7"), start = c(85554228L, 60877576L, 108793341L, 92432803L, 
    100124401L, 109519172L), end = c(85555389L, 60879894L, 108795193L, 
    92434088L, 100125923L, 109521109L), strand = c("-", "-", "-", 
    "-", "-", "-"), bam_RC1 = c(1691L, 2410L, 3334L, 1043L, 1479L, 
    2703L), bam_RC2 = c(137L, 235L, 143L, 60L, 114L, 123L), bam_RC3 = c(45L, 
    72L, 64L, 28L, 62L, 36L), bam_RC4 = c(107L, 161L, 164L, 66L, 
    129L, 95L), length = c(1161L, 2318L, 1852L, 1285L, 1522L, 1937L
    )), class = "data.frame", row.names = c(NA, -6L))