I have two data sets, I would like to find overlap/intersect/ common regions between them and if there is any overlap , then extract each initial table:
Data A:
chr start end
chr1 25 35
chr1 50 70
chr1 60 85
Data B:
chr start end score
chr1 10 15 24
chr1 55 75 14
chr1 76 82 10
out put tables:
out put 1: results of common regions
chr start end
chr1 55 70
chr1 70 75
chr1 76 82
out put 2: extract from data A:
chr start end
chr1 50 70
chr1 60 85
out put 3: extract from data B:
chr start end score
chr1 55 75 14
chr1 76 82 10
I have tried different ways but I do not know which one is the best:
library(GenomicRanges)
enhancer = with(dataA, GRanges(chr, IRanges(start=start, end=end)))
H3K4me1= with(dataB, GRanges(chr, IRanges(start=start, end=end)))
way 1:
hits <- findOverlaps(dataA, dataB)
ranges(dataA)[queryHits(hits)] = ranges(dataB)[subjectHits(hits)]
dataA
dataB
way2:
over<- subsetByOverlaps(dataA, dataB)
way 3:
inter = intersect(dataA, dataB)
way 4:
groupA <- data.table(dataA)
setkey(groupA, chr, start, end)
groupB <- data.table(dataB)
setkey(groupB, chr, start, end)
over <- foverlaps(groupA, groupB, nomatch = 0)
over2 <- data.table(
chr = over$chr,
start = over[, ifelse(start > i.start, start, i.start)],
end = over[, ifelse(end < i.end, end, i.end)])
I'm not sure if this is what you want. Would you mind creating a reproducible example as described here.
library(dplyr)
DataA <- data.frame(chr = c("chr1", "chr1", "chr1"), start = c(25,50,60), end = c(35,70,85))
DataB <- data.frame(chr = c("chr1", "chr1", "chr1"), start = c(10,55,76), end = c(15,75,82), score = c(24,14,10))
luA <- Map(`:`, DataA$start, DataA$end)
luA <- data.frame(value = unlist(luA),
index = rep(seq_along(luA), lapply(luA, length)))
DataA[luA$index[match(DataB$start, luA$value)],]
DataB[luA$index[match(DataB$start, luA$value)],]