I am trying to visualize the correlation of one variable (cl_wet) on 16 other variables in R using the corrplot package. My data does contain a fair amount of NA values, but I have been able to omit them in my correlation code I have used before. It just won't work for the visualization:
Here is the code I have been running:
liqcor <- cor(x = liquid.wet$cl_wet, y = liquid.wet[2:17], use = "complete.obs")
corrplot(liqcor)
And I have been receiving these errors:
Error in symbols(Pos, add = TRUE, inches = FALSE, rectangles = matrix(1, :
invalid symbol coordinates
Warning in min(corr, na.rm = TRUE) :
no non-missing arguments to min; returning Inf
Warning in max(corr, na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Error in symbols(Pos, add = TRUE, inches = FALSE, rectangles = matrix(1, :
invalid symbol coordinates
Any advice is appreciated! Below is a sample of my code for reproducibility
liquid.dataset <-
structure(
list(
cl_wet = c(
0.15738,
0.07897,
0.21313,
0.20552,
0.21005,
0.3,
0.30583,
0.29432,
0.22091,
0.14322,
0.17247,
0.29264,
0.12911,
0.2439,
0.32264,
0.333,
0.4097,
0.1386,
0.25436,
0.52432,
0.44101,
0.20917,
0.14436,
0.17538,
0.13455
),
Moisture = c(
95,
98,
95,
96,
95,
93,
89,
91,
88,
96,
96,
93,
96,
91,
89,
92,
88,
NA,
NA,
89,
89,
96,
96,
96,
97
),
Dry.matter = c(
5L,
2L,
5L,
4L,
5L,
7L,
11L,
9L,
12L,
4L,
4L,
7L,
4L,
9L,
11L,
8L,
12L,
NA,
NA,
11L,
11L,
4L,
4L,
4L,
3L
),
TN = c(
0.530443645,
0.28263789,
0.512529976,
0.497601918,
0.491630695,
0.666786571,
0.745407674,
0.723513189,
0.980275779,
0.617026379,
0.330407674,
0.719532374,
0.447841727,
0.768297362,
1.062877698,
0.91558753,
1.188273381,
NA,
NA,
0.878764988,
0.860851319,
0.468741007,
0.436894484,
0.371211031,
0.301546763
),
P2O5 = c(
0.179856115,
0.082733813,
0.179856115,
0.191846523,
0.191846523,
0.167865707,
0.179856115,
0.20383693,
0.383693046,
0.167865707,
0.101918465,
0.287769784,
0.143884892,
0.251798561,
0.419664269,
0.35971223,
0.575539568,
NA,
NA,
0.323741007,
0.35971223,
0.116306954,
0.143884892,
0.131894484,
0.10911271
),
K2O = c(
0.275779376,
0.1558753,
0.347721823,
0.323741007,
0.335731415,
0.419664269,
0.431654676,
0.431654676,
0.503597122,
0.347721823,
0.275779376,
0.455635492,
0.251798561,
0.419664269,
0.575539568,
0.575539568,
0.815347722,
NA,
NA,
0.539568345,
0.551558753,
0.383693046,
0.239808153,
0.227817746,
0.287769784
),
Na = c(
0.065947242,
0.037170264,
0.088729017,
0.082733813,
0.085131894,
NA,
NA,
NA,
NA,
NA,
0.082733813,
NA,
0.049160671,
0.100719424,
0.179856115,
0.179856115,
0.251798561,
NA,
NA,
0.143884892,
0.143884892,
0.088729017,
0.053956835,
0.076738609,
0.088729017
),
Ca = c(
0.083932854,
0.049160671,
0.075539568,
0.087529976,
0.086330935,
NA,
NA,
NA,
NA,
NA,
0.080335731,
NA,
0.085131894,
0.09352518,
0.131894484,
0.131894484,
0.167865707,
NA,
NA,
0.112709832,
0.117505995,
0.052757794,
0.073141487,
0.086330935,
0.083932854
),
Mg = c(
0.059952038,
0.034772182,
0.07793765,
0.081534772,
0.081534772,
NA,
NA,
NA,
NA,
NA,
0.064748201,
NA,
0.050359712,
0.098321343,
0.119904077,
0.10911271,
0.167865707,
NA,
NA,
0.215827338,
0.227817746,
0.047961631,
0.056354916,
0.080335731,
0.061151079
),
Zn = c(
0.028776978,
0.013189448,
0.044364508,
0.044364508,
0.045563549,
NA,
NA,
NA,
NA,
NA,
0.004916067,
NA,
0.029976019,
0.007074341,
0.007553957,
0.006235012,
0.009952038,
NA,
NA,
0.006594724,
0.006714628,
0.013189448,
0.023980815,
0.005755396,
0.001115108
),
Fe = c(
0.008992806,
0.004916067,
0.023980815,
0.025179856,
0.026378897,
NA,
NA,
NA,
NA,
NA,
0.01558753,
NA,
0.010551559,
0.00911271,
0.017985612,
0.017985612,
0.022781775,
NA,
NA,
0.013189448,
0.01558753,
0.005635492,
0.008513189,
0.014388489,
0.002877698
),
Mn = c(
0.001918465,
0.000791367,
0.003597122,
0.003717026,
0.00383693,
NA,
NA,
NA,
NA,
NA,
0.001558753,
NA,
0.001798561,
0.001798561,
0.003117506,
0.00323741,
0.003956835,
NA,
NA,
0.003956835,
0.004316547,
0.001318945,
0.001558753,
0.001438849,
0.000683453
),
Cu = c(
0.003117506,
0.002398082,
0.007913669,
0.008393285,
0.008633094,
NA,
NA,
NA,
NA,
NA,
0.001558753,
NA,
0.002517986,
0.006354916,
0.003357314,
0.002278177,
0.004796163,
NA,
NA,
0.001558753,
0.001558753,
0.001438849,
0.00383693,
0.002398082,
0.000959233
),
S = c(
0.071942446,
0.028776978,
0.106714628,
0.107913669,
0.107913669,
NA,
NA,
NA,
NA,
NA,
0.028776978,
NA,
0.037170264,
0.068345324,
0.131894484,
0.131894484,
0.179856115,
NA,
NA,
0.095923261,
0.094724221,
0.081534772,
0.061151079,
0.034772182,
0.023980815
),
NH3 = c(
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
0.851318945,
0.731414868,
0.923261391,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
),
TC = c(
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_,
NA_real_
),
NO3 = c(
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
0.001318945,
7.79e-05,
3.6e-05,
NA,
NA,
NA,
NA,
NA,
NA,
NA,
NA
)
),
row.names = c(NA, 25L),
class = "data.frame"
)
Since you have missing values and the number of missing values are not equal between your features, it is not recommended to use complete.obs
option within the cor()
function because every row containing missing value will be completely removed and you will loose information for some pairwise comparisons that are not missing indeed. Instead, better to use 'pairwise.complete.obs
' option where missing values will be removed only a particular pairwise correlation. Thus the correlation or covariance between each pair of variables is computed using all complete pairs of observations on those variables.
#calculate correlation
liqcor <- cor(x = liquid.dataset$cl_wet, y =liquid.dataset[2:17], use = "pairwise.complete.obs")
# plot correlation matrix
library(corrplot)
corrplot(liqcor
,addgrid.col = T
,type = 'upper'
,addCoef.col = T
,number.cex = .7
,diag = T
,tl.cex = .9)