I have a vector which looks like this
1 -1 1 1 -1 -1 -1 1 -1
I need to get the groups of 1s and -1s, i.e.
1, -1
, 1, 1, -1, -1, -1
, 1, -1
with the eventual aim of getting the starting position of these groups and the end position, i.e.
starting positions: 1, 3, 8
and end positions: 3, 8, 10
rle()
doesn't quite get to what I want
r <- rle(c(1, -1, 1, 1, -1, -1, -1, 1, -1))
which(rep(x = diff(sign(diff(c(-Inf, r$values, -Inf)))) == -2, times = r$lengths))
# [1] 1 3 4 8
So here, I don't want the 4
.
The vector I have given as an example may not cover all my bases. It is entirely plausible I have 0
s in the vector too. This would never intersect the -1
and 1
, e.g.
0 0 1 -1 1 1 -1 -1 -1 1 -1
or
1 -1 1 1 -1 -1 -1 1 -1 0 0
or
0 0 1 -1 1 1 -1 -1 -1 1 -1 0 0
or
1 -1 0 0 1 1 -1 -1 -1 1 -1
could all be valid.
EDIT: The full code which I am trying to replace is
xc <- paste(as.character(sign(diff(x))), collapse = "")
xc <- gsub("1", "+", gsub("-1", "-", xc))
xc <- gsub("0", zero, xc)
peakpat <- sprintf("[+]{%d,}[-]{%d,}", nups, ndowns)
rc <- gregexpr(peakpat, xc)[[1]]
if (rc[1] < 0) return(NULL)
x1 <- rc
x2 <- rc + attr(rc, "match.length")
attributes(x1) <- NULL
attributes(x2) <- NULL
(see pracma::findpeaks()
). This code is very slow so I am trying to solve it using integers. So far I have
xc2 <- sign(diff(x))
changepoints <- cumsum(abs(c(1, diff(xc2) != 0)))
group_size <- tabulate(changepoints)
lag <- c(NA, group_size[seq_len(length(group_size) - 1)])
lead <- c(group_size[2:length(group_size)], NA)
rc1 <- rep(0L, length(xc2))
rc1[xc2 == 1 & rep(group_size >= nups, group_size) & rep(lead >= ndowns, group_size)] <- 1L
rc1[xc2 == -1 & rep(group_size >= ndowns, group_size) & rep(lag >= nups, group_size)] <- -1L
rc1_backup <- rc1
rc1[rc1 == 0] <- -1
x1 <- which(diff(c(0, rc1)) > 0)
rc1 <- rc1_backup
rc1[rc1 == 0] <- 1
x2 <- which(diff(c(rc1, 0)) > 0) + 1
Which almost works...but not quite.
In the end, rle()
gave me what I needed. I just needed the correct subsets.
rc <- rle(xc2)
vals <- rc$values
lens <- rc$lengths
rc_len <- length(rc$lengths)
lead <- c(vals[2:rc_len], NA)
lead_lens <- c(lens[2:rc_len], NA)
pos_peak_start <- which(vals == 1 & lens >= nups & lead == -1 & lead_lens >= ndowns)
x1 <- (cumsum(c(0, lens)) + 1)[pos_peak_start]
lag <- c(NA, vals[-rc_len])
lag_lens <- c(NA, lens[-rc_len])
pos_peak_end <- which(vals == -1 & lens >= ndowns & lag == 1 & lag_lens >= nups)
x2 <- (cumsum(lens) + 1)[pos_peak_end]