I have a dataframe with 59 rows and 371 columns. Rows are my observations/sites, and columns are imagery with a time stamp as column header. After sorting and cleaning the column headers I am left with this:
data<-structure(list(X20151126 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 0.277672673867523, 0.355025896133641, NA, NA, NA), X20151126.1 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.278084206794621, 0.355967923936499,
NA, NA, NA), X20151126.2 = c(NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, 0.277672673867523, 0.355025896133641, NA, NA, NA), X20151126.3 = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 0.278084206794621, 0.355967923936499,
NA, NA, NA), X20151216 = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, 0.384717953500922, 0.476687361663067, 0.369193020073217,
0.509256980090469, 0.695446322082805), X20151216.1 = c(0.482005639964749,
0.477315968778509, 0.577629441578537, 0.521768662684214, 0.403182719183149,
0.495476052715638, 0.449110279438877, 0.572210709159168, 0.639095940861963,
0.663725301936293, 0.385697051441031, 0.476921890053818, 0.369067776922609,
0.509928156203107, 0.696681651400943), X20160105 = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.414471298501795, 0.476356388531503,
0.363677767527229, 0.536833655761341, 0.627173043983676), X20160105.1 = c(0.557391753621561,
0.545031565171865, 0.611938234209565, 0.552637066670738, 0.462370657856108,
0.514062089559983, 0.517862730716598, 0.607005393447421, 0.62782746269337,
0.669313073182483, 0.415045773658901, 0.476891950837264, 0.364111066602943,
0.535738794288108, 0.627583094107998), X20160305 = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.608603344509915, 0.635371377161601,
0.609235980874035, 0.55732646087173, 0.55009492623478), X20160305.1 = c(0.663171088076713,
0.594729693525975, 0.624064072126327, 0.60486680369263, 0.557099508853904,
0.525207209646858, 0.597555736004227, 0.55009547536139, 0.596528841673565,
0.609200814692677, 0.609410463440908, 0.635462899436559, 0.608558438182846,
0.557738728019745, 0.550279584646311), X20160315 = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.643538550411662, 0.662874802494349,
0.641220282780279, 0.548161668675193, 0.574401192865252), X20160315.1 = c(0.668475363330821,
0.598489967449372, 0.627731925878224, 0.598095603148813, 0.567868324796377,
0.310253447067502, 0.628817539418026, 0.585246425272493, 0.562730022483348,
0.601923400619283, 0.644128383766072, 0.663057419883037, 0.640840244150391,
0.547853260818411, 0.57379435997677), X20160315.2 = c(NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, 0.643538550411662, 0.662874802494349,
0.641220282780279, 0.548161668675193, 0.574401192865252), X20160315.3 = c(0.668475363330821,
0.598489967449372, 0.627731925878224, 0.598095603148813, 0.567868324796377,
0.310253447067502, 0.628817539418026, 0.585246425272493, 0.562730022483348,
0.601923400619283, 0.644128383766072, 0.663057419883037, 0.640840244150391,
0.547853260818411, 0.57379435997677), X20160325 = c(NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, 0.617099941392511, 0.64324564948683,
0.610466453170061, 0.524114324562118, 0.558504551047362)), row.names = c("Site_00",
"Site_01", "Site_02", "Site_03", "Site_04", "Site_05", "Site_06",
"Site_09", "Site_10", "Site_11", "Site_12", "Site_13", "Site_16",
"Site_17", "Site_18"), class = "data.frame")
Some imagery, but not all, have multiples over the same time stamp e.g. X20151126
, X20151126.1
and X20151126.2
. However, not all of these multiples have values, as seen by the NA
's.
I would like to create a new data frame where I reduce these multiples by calculating the mean (and where it only has NA
I would like to keep NA
). Ultimately, I would like this:
x20151126<-apply(data[grep("20151126",colnames(data), value=T)],
MARGIN=1,FUN=mean,na.rm=TRUE)
x20151216<-apply(data[grep("20151216",colnames(data), value=T)],
MARGIN=1,FUN=mean,na.rm=TRUE)
x20160105<-apply(data[grep("20160105",colnames(data), value=T)],
MARGIN=1,FUN=mean,na.rm=TRUE)
x20160305<-apply(data[grep("20160305",colnames(data), value=T)],
MARGIN=1,FUN=mean,na.rm=TRUE)
x20160315<-apply(data[grep("20160315",colnames(data), value=T)],
MARGIN=1,FUN=mean,na.rm=TRUE)
x20160325<-apply(data[grep("20160325",colnames(data), value=T)],
MARGIN=1,FUN=mean,na.rm=TRUE)
NewData<-as.data.frame(cbind(x20151126,x20151216,x20160105,x20160305,x20160315,x20160325))
Is there a way to automate this code, without having to specify all the column headers using grep
? Like I mentioned, I have 371 column headers, and not all of them have multiples. Also, I have 9 different tables that all look similar to this.
Any suggestions or help would be appreciated.
You can use split.default
to split data into similar named columns and use rowMeans
to calculate row-wise mean for each list.
sapply(split.default(data, sub('\\..*', '', names(data))), rowMeans, na.rm = TRUE)
# X20151126 X20151216 X20160105 X20160305 X20160315 X20160325
#Site_00 NaN 0.4820056 0.5573918 0.6631711 0.6684754 NaN
#Site_01 NaN 0.4773160 0.5450316 0.5947297 0.5984900 NaN
#Site_02 NaN 0.5776294 0.6119382 0.6240641 0.6277319 NaN
#Site_03 NaN 0.5217687 0.5526371 0.6048668 0.5980956 NaN
#Site_04 NaN 0.4031827 0.4623707 0.5570995 0.5678683 NaN
#Site_05 NaN 0.4954761 0.5140621 0.5252072 0.3102534 NaN
#Site_06 NaN 0.4491103 0.5178627 0.5975557 0.6288175 NaN
#Site_09 NaN 0.5722107 0.6070054 0.5500955 0.5852464 NaN
#Site_10 NaN 0.6390959 0.6278275 0.5965288 0.5627300 NaN
#Site_11 NaN 0.6637253 0.6693131 0.6092008 0.6019234 NaN
#Site_12 0.2778784 0.3852075 0.4147585 0.6090069 0.6438335 0.6170999
#Site_13 0.3554969 0.4768046 0.4766242 0.6354171 0.6629661 0.6432456
#Site_16 NaN 0.3691304 0.3638944 0.6088972 0.6410303 0.6104665
#Site_17 NaN 0.5095926 0.5362862 0.5575326 0.5480075 0.5241143
#Site_18 NaN 0.6960640 0.6273781 0.5501873 0.5740978 0.5585046
Using sub
we keep only common part of column names which is used to split data.
sub('\\..*', '', names(data))
#[1] "X20151126" "X20151126" "X20151126" "X20151126" "X20151216" "X20151216"
#[7] "X20160105" "X20160105" "X20160305" "X20160305" "X20160315" "X20160315"
#[13] "X20160315" "X20160315" "X20160325"