I have two datasets Data and Data1. I want to merge these keeping all difference while adding the numeric values together in the new table for all common rows. Is there any easy tool for this?
head(Data)
contig position variantID refAllele altAllele refCount altCount totalCount lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
1 chr1 905373 . T C 2 4 6 0 0 6 0 0
2 chr1 911428 . C T 1 2 3 0 0 3 0 0
3 chr1 953279 . T C 146 126 272 0 0 273 1 0
4 chr1 962184 . T C 14 15 29 0 0 29 0 0
5 chr1 1024129 . T G 1 0 1 0 0 1 0 0
6 chr1 1039514 . C T 1 1 2 0 0 2 0 0
head(Data1)
contig position variantID refAllele altAllele refCount altCount totalCount lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
1 chr1 905373 . T C 2 3 5 0 0 5 0 0
2 chr1 933024 . C T 1 0 1 0 0 1 0 0
3 chr1 953279 . T C 122 124 246 0 0 248 2 0
4 chr1 962184 . T C 17 21 38 0 0 38 0 0
5 chr1 1022518 . G T 0 1 1 0 0 1 0 0
6 chr1 1024129 . T G 1 2 3 0 0 3 0 0
Wanted Output example
contig position variantID refAllele altAllele refCount altCount totalCount lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
1 chr1 905373 . T C 4 7 11 0 0 11 0 0
2 chr1 911428 . C T 1 2 3 0 0 3 0 0
2 chr1 933024 . C T 1 0 1 0 0 1 0 0
4 chr1 953279 . T C 268 150 518 0 0 521 3 0
As we can see in column position site 905373 is common is added together from column refCount on. while site 911428 and 933024 are both unique to their datasets but inserted into the new dataset. Is their a none painful way of creating the output table?
Data <- structure(list(contig = c("chr1", "chr1", "chr1", "chr1", "chr1",
"chr1"), position = c(905373L, 911428L, 953279L, 962184L, 1024129L,
1039514L), variantID = c(".", ".", ".", ".", ".", "."), refAllele = c("T",
"C", "T", "T", "T", "C"), altAllele = c("C", "T", "C", "C", "G",
"T"), refCount = c(2L, 1L, 146L, 14L, 1L, 1L), altCount = c(4L,
2L, 126L, 15L, 0L, 1L), totalCount = c(6L, 3L, 272L, 29L, 1L,
2L), lowMAPQDepth = c(0L, 0L, 0L, 0L, 0L, 0L), lowBaseQDepth = c(0L,
0L, 0L, 0L, 0L, 0L), rawDepth = c(6L, 3L, 273L, 29L, 1L, 2L),
otherBases = c(0L, 0L, 1L, 0L, 0L, 0L), improperPairs = c(0L,
0L, 0L, 0L, 0L, 0L)), row.names = c(NA, 6L), class = "data.frame")
Data1 <- structure(list(contig = c("chr1", "chr1", "chr1", "chr1", "chr1",
"chr1"), position = c(905373L, 933024L, 953279L, 962184L, 1022518L,
1024129L), variantID = c(".", ".", ".", ".", ".", "."), refAllele = c("T",
"C", "T", "T", "G", "T"), altAllele = c("C", "T", "C", "C", "T",
"G"), refCount = c(2L, 1L, 122L, 17L, 0L, 1L), altCount = c(3L,
0L, 124L, 21L, 1L, 2L), totalCount = c(5L, 1L, 246L, 38L, 1L,
3L), lowMAPQDepth = c(0L, 0L, 0L, 0L, 0L, 0L), lowBaseQDepth = c(0L,
0L, 0L, 0L, 0L, 0L), rawDepth = c(5L, 1L, 248L, 38L, 1L, 3L),
otherBases = c(0L, 0L, 2L, 0L, 0L, 0L), improperPairs = c(0L,
0L, 0L, 0L, 0L, 0L)), row.names = c(NA, 6L), class = "data.frame")
Here a posibility:
Data
#> contig position variantID refAllele altAllele refCount altCount totalCount
#> 1 chr1 905373 . T C 2 4 6
#> 2 chr1 911428 . C T 1 2 3
#> 3 chr1 953279 . T C 146 126 272
#> 4 chr1 962184 . T C 14 15 29
#> 5 chr1 1024129 . T G 1 0 1
#> 6 chr1 1039514 . C T 1 1 2
#> lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
#> 1 0 0 6 0 0
#> 2 0 0 3 0 0
#> 3 0 0 273 1 0
#> 4 0 0 29 0 0
#> 5 0 0 1 0 0
#> 6 0 0 2 0 0
Data1
#> contig position variantID refAllele altAllele refCount altCount totalCount
#> 1 chr1 905373 . T C 2 3 5
#> 2 chr1 933024 . C T 1 0 1
#> 3 chr1 953279 . T C 122 124 246
#> 4 chr1 962184 . T C 17 21 38
#> 5 chr1 1022518 . G T 0 1 1
#> 6 chr1 1024129 . T G 1 2 3
#> lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
#> 1 0 0 5 0 0
#> 2 0 0 1 0 0
#> 3 0 0 248 2 0
#> 4 0 0 38 0 0
#> 5 0 0 1 0 0
#> 6 0 0 3 0 0
aggregate(. ~ contig + position + variantID + refAllele + altAllele, rbind(Data, Data1), sum)
#> contig position variantID refAllele altAllele refCount altCount totalCount
#> 1 chr1 905373 . T C 4 7 11
#> 2 chr1 953279 . T C 268 250 518
#> 3 chr1 962184 . T C 31 36 67
#> 4 chr1 1024129 . T G 2 2 4
#> 5 chr1 911428 . C T 1 2 3
#> 6 chr1 933024 . C T 1 0 1
#> 7 chr1 1039514 . C T 1 1 2
#> 8 chr1 1022518 . G T 0 1 1
#> lowMAPQDepth lowBaseQDepth rawDepth otherBases improperPairs
#> 1 0 0 11 0 0
#> 2 0 0 521 3 0
#> 3 0 0 67 0 0
#> 4 0 0 4 0 0
#> 5 0 0 3 0 0
#> 6 0 0 1 0 0
#> 7 0 0 2 0 0
#> 8 0 0 1 0 0