Search code examples
rcorrelationr-caret

Column-wise Pearson Correlation between two dataframes


I want to find the column-wise Pearson correlation between two dataframes.

res <- diag(cor(t(mrna.norm),t(protein), method="pearson"))
sig.cor <- findCorrelation(res, cutoff=0.75)

Traceback:

Error in if (exact) findCorrelation_exact(x = x, cutoff = cutoff, verbose = verbose) else findCorrelation_fast(x = x,  : 
  argument is of length zero

Input:

> dput(mrna.norm[1:10,1:10])
structure(list(TCGA.2K.A9WE.01 = c(7.65342121905285, 6.35598354101006, 
14.3511850042327, 10.3737643425674, 10.0819596419255, 9.44832324553208, 
5.36085937172008, 9.78880184184623, 10.3776687573505, 11.16757118884
), TCGA.2Z.A9J1.01 = c(5.09389393824392, 6.93597002271109, 12.4136523086721, 
11.1918237390263, 10.1912122382252, 9.9623840324273, 6.22565668754941, 
10.3398477765017, 10.3103072842012, 11.1287210937383), TCGA.2Z.A9J3.01 = c(4.70168212029528, 
7.54694769203808, 10.1689338100564, 9.96839262629172, 9.87305770150294, 
9.75535162798678, 6.170389794965, 10.238532641469, 9.94050095178643, 
11.0690397931313), TCGA.2Z.A9J6.01 = c(5.13719199914349, 6.92859654071157, 
12.0367193976262, 10.8202555636581, 10.3262700402849, 9.91216810777653, 
5.52284042813955, 10.0653680664815, 10.5954686012028, 11.2355920880251
), TCGA.2Z.A9J7.01 = c(6.95117512427229, 7.25014205824679, 10.9656928148969, 
10.5991523452113, 10.4556415168452, 9.46537845450025, 6.72337288854289, 
10.0139441477751, 9.28408724134641, 11.4833270722276), TCGA.2Z.A9J8.01 = c(3.61712213221935, 
7.92111077839189, 11.9975977242282, 9.91379851626213, 9.95999222715916, 
9.90779350021794, 7.85831302551685, 10.3997246047534, 11.7402171909708, 
11.7246448152361), TCGA.2Z.A9JI.01 = c(8.20162111992077, 5.73826794012289, 
12.7954861179578, 10.0094938620897, 9.93400880935778, 9.75330735360058, 
7.0082672553098, 9.74081003982032, 10.7382235152475, 11.6720072357516
), TCGA.2Z.A9JJ.01 = c(6.26475409489153, 6.39123499553712, 12.8198023802381, 
11.7916439373724, 9.53606689182685, 10.3591574288036, 5.71406413888836, 
10.0630462305102, 10.2195932783632, 11.455724780085), TCGA.2Z.A9JQ.01 = c(6.07015744755466, 
7.61299452031897, 13.1209957412573, 10.9576837919022, 9.8945869439465, 
9.23740931858995, 4.16326524153571, 10.523017510386, 10.532865308348, 
10.619622367405), TCGA.4A.A93W.01 = c(5.59521719360418, 6.49378341273855, 
13.9452588877727, 11.136365018286, 10.0338699199786, 9.01178744914112, 
7.04236745871339, 9.77739489226773, 10.1944306108943, 11.0829417734315
)), row.names = c("A1BG", "A2LD1", "A2M", "A4GALT", "AAAS", "AACS", 
"AADAT", "AAGAB", "AAK1", "AAMP"), class = "data.frame")

> dput(protein[1:10,1:10])
structure(list(TCGA.2K.A9WE.01 = c(0.188926557, 0.32718108, -0.194691117, 
0.190334105, -0.4208597615, -0.5700936425, 0.1030672605, -0.1680609095, 
-0.7569390145, -0.519282528), TCGA.2Z.A9J1.01 = c(0.12354026575, 
0.10028027475, -0.20595222225, -0.26450453025, -0.34263506075, 
0.0128347092499997, 0.00571045124999991, -0.86616581675, 0.70064564225, 
0.56620914675), TCGA.2Z.A9J3.01 = c(-0.196046266, -0.00864849600000012, 
0.481444919, 0.048551157, 0.3332741995, 0.0318001934999999, -0.2472935905, 
0.0795903354999999, 0.9921437455, 1.048471904), TCGA.2Z.A9J6.01 = c(0.209220311, 
0.355386269, 0.188494459, -0.089151463, -0.3560846405, -0.6831252785, 
0.0195664705, -0.1564588715, 0.5518990085, 0.263465278), TCGA.2Z.A9J7.01 = c(-0.0607107314999999, 
-0.0887603205, -0.2174384005, 0.4300315565, 0.248662399, 0.618513086, 
-0.0575771639999999, 0.47129971, 0.424678434, 1.0955476295), 
    TCGA.2Z.A9J8.01 = c(0.00102636749999996, 0.1892364665, -0.1960149265, 
    0.1247237835, -0.222737563, 0.548415673, 0.041269493, -0.040282716, 
    -0.26123292, 0.3184646545), TCGA.2Z.A9JI.01 = c(0.30400839175, 
    -0.22464360025, 1.33801850575, 1.20449140775, -0.21819965775, 
    0.000976520249999946, -0.09049228975, -1.04011744075, -0.09890175575, 
    1.89681005675), TCGA.2Z.A9JJ.01 = c(-0.2692214425, -0.0864145745000001, 
    0.0230486505000001, 0.6014778945, 0.08944319, 0.845252946, 
    0.156608958, 0.346152042, -0.083945674, -0.4120688775), TCGA.2Z.A9JQ.01 = c(-0.3935089475, 
    -0.0424516914999999, 0.0117823055, 0.0613523355000001, -0.0212341929999998, 
    0.36580148, -0.0890735009999998, 0.193010937, 0.479776154, 
    1.1254847975), TCGA.4A.A93W.01 = c(0.1850438315, 0.2804649655, 
    0.3236980115, -0.9023580375, 0.408590392, 0.551551224, -0.0721507979999999, 
    -0.647440507, -0.158645141, -0.1104136205)), row.names = c("14-3-3_beta", 
"14-3-3_epsilon", "14-3-3_zeta", "4E-BP1", "4E-BP1_pS65", "4E-BP1_pT37_T46", 
"4E-BP1_pT70", "53BP1", "A-Raf_pS299", "ACC1"), class = "data.frame")

Solution

  • This is a case for Map. It passes, one by one, each element of the data sets to the function it calls.

    # compute pairwise correlations of the data.frames' columns and show the first six.
    Map(cor, mrna.norm, protein) |> head()
    #> $TCGA.2K.A9WE.01
    #> [1] -0.5079263
    #> 
    #> $TCGA.2Z.A9J1.01
    #> [1] -0.1388509
    #> 
    #> $TCGA.2Z.A9J3.01
    #> [1] 0.681517
    #> 
    #> $TCGA.2Z.A9J6.01
    #> [1] -0.1329392
    #> 
    #> $TCGA.2Z.A9J7.01
    #> [1] 0.5856984
    #> 
    #> $TCGA.2Z.A9J8.01
    #> [1] -0.1040772
    
    # same calculations, coerce result to data.frame
    Map(cor, mrna.norm, protein) |> list2DF()
    #>   TCGA.2K.A9WE.01 TCGA.2Z.A9J1.01 TCGA.2Z.A9J3.01 TCGA.2Z.A9J6.01
    #> 1      -0.5079263      -0.1388509        0.681517      -0.1329392
    #>   TCGA.2Z.A9J7.01 TCGA.2Z.A9J8.01 TCGA.2Z.A9JI.01 TCGA.2Z.A9JJ.01
    #> 1       0.5856984      -0.1040772       0.5429705       0.2429322
    #>   TCGA.2Z.A9JQ.01 TCGA.4A.A93W.01
    #> 1       0.4349538      -0.1856298
    

    Created on 2023-07-16 with reprex v2.0.2