I have a dataframe. I want to calculate FPKM by using formula on columns (5,6,7,8)
fpkm = (X * 10^9) / (colSums(X) * length) # i.e. (1691 * 10^9)/(12660 * 1161) for row1
chr start end strand bam_RC1 bam_RC2 bam_RC3 bam_RC4 length
chr8 85554228 85555389 - 1691 137 45 107 1161
chr11 60877576 60879894 - 2410 235 72 161 2318
chr12 108793341 108795193 - 3334 143 64 164 1852
chr2 92432803 92434088 - 1043 60 28 66 1285
chr13 100124401 100125923 - 1479 114 62 129 1522
chr7 109519172 109521109 - 2703 123 36 95 1937
Can anyone please tell me who to create such function and write values in separate columns. Thanks
We may use across
in mutate
library(dplyr)
df1 <- df1 %>%
mutate(across(5:8, ~ (.x * 10^9)/(sum(.x, na.rm = TRUE) * length), .names = "fpkm_{.col}"))
-output
df1
chr start end strand bam_RC1 bam_RC2 bam_RC3 bam_RC4 length fpkm_bam_RC1 fpkm_bam_RC2 fpkm_bam_RC3 fpkm_bam_RC4
1 chr8 85554228 85555389 - 1691 137 45 107 1161 115047.63 145322.32 126253.06 127648.10
2 chr11 60877576 60879894 - 2410 235 72 161 2318 82123.96 124852.83 101176.74 96200.04
3 chr12 108793341 108795193 - 3334 143 64 164 1852 142197.16 95090.91 112564.28 122649.47
4 chr2 92432803 92434088 - 1043 60 28 66 1285 64113.20 57503.21 70976.82 71138.32
5 chr13 100124401 100125923 - 1479 114 62 129 1522 76757.32 92243.16 132690.14 117391.83
6 chr7 109519172 109521109 - 2703 123 36 95 1937 110225.66 78202.29 60538.90 67929.24
or using base R
df1[paste0("fpkm_", names(df1)[5:8])] <-
(df1[5:8] * 10^9)/(colSums(df1[5:8],
na.rm = TRUE)[col(df1[5:8])] * df1$length)
df1 <- structure(list(chr = c("chr8", "chr11", "chr12", "chr2", "chr13",
"chr7"), start = c(85554228L, 60877576L, 108793341L, 92432803L,
100124401L, 109519172L), end = c(85555389L, 60879894L, 108795193L,
92434088L, 100125923L, 109521109L), strand = c("-", "-", "-",
"-", "-", "-"), bam_RC1 = c(1691L, 2410L, 3334L, 1043L, 1479L,
2703L), bam_RC2 = c(137L, 235L, 143L, 60L, 114L, 123L), bam_RC3 = c(45L,
72L, 64L, 28L, 62L, 36L), bam_RC4 = c(107L, 161L, 164L, 66L,
129L, 95L), length = c(1161L, 2318L, 1852L, 1285L, 1522L, 1937L
)), class = "data.frame", row.names = c(NA, -6L))