Here it is my data
## Data
datex <- c(rep("2021-01-18", 61), rep("2021-01-19", 125))
hourx <- c(0,1,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,16,10,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,11,0,0,0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,3,3,3,3,3,3,3,4,4,4,4,4,4,4,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8,9,9,9,9,9,9,9,9,10,10,10,10,10,10,10,10,11,11,11,11,11,11,11,11,11,11,12,12,12,12,12,12,12,12,13,13,13,13,13,13,13,13,14,14,14,14,14,14,14,14,14,15,15,15,15,16,16,16,16)
seller <- c("dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2","dombsdpapp1","dombsdpapp1","dombsdpapp2","dombsdpapp2")
product <- c("00021460","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021460","00021459","00021460","00021460","00021460","00021459","00021459","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021460","00021459","00021459","00021459","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021460","00021460","00021460","00021459","00021459","00021460","00021459","00021459","00021460","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021460","00021460","00021460","00021460","00021459","00021459","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021459","00021460","00021460","00021460","00021460","00021459","00021459","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021460","00021459","00021460","00021459","00021460","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021460","00021459","00021460","00021459","00021459","00021459","00021460","00021460","00021459","00021459","00021460","00021460","00021460","00021459","00021459","00021460","00021459","00021459","00021459","00021460","00021460","00021460","00021460","00021460","00021460","00021460","00021460","00021460","00021460")
detail <- c("E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","notEnoughBalance","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","E99","notEnoughBalance","E99","success","success","success","E99","success","success","E99","success","E99","success","E99","E99","success","E99","E99","success","E99","success","E99","success","E99","success","E99","success","success","E99","E99","E99","success","success","E99","success","E99","success","E99","success","success","E99","E99","E99","success","E99","success","success","E99","E99","success","E99","success","E99","success","success","E99","E99","success","success","E99","E99","success","E99","success","success","E99","success","E99","success","E99","E99","success","success","E99","E99","success","E99","success","success","E99","E99","E99","success","success","notEnoughBalance","E99","success","success","E99","success","E99","success","notEnoughBalance","E99","success","E99","E99","success","E99","success","success","E99","success","E99","E99","success","E99","success","success","E99","success","success","E99","E99","success","notEnoughBalance","E99","E99","success","E99","success","success","E99","E99","success","success","E99")
status <- c("FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","OK01","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","FI04","OK01","FI04","OK00","OK00","OK00","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","FI04","OK00","FI04","FI04","OK00","FI04","OK00","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","FI04","OK00","FI04","FI04","OK00","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","FI04","OK00","OK00","OK01","FI04","OK00","OK00","FI04","OK00","FI04","OK00","OK01","FI04","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","FI04","FI04","OK00","FI04","OK00","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK01","FI04","FI04","OK00","FI04","OK00","OK00","FI04","FI04","OK00","OK00","FI04")
channel <- c("f2","f2","f2","f3","f2","f3","f2","f3","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f2","f3","f2","f3","f3","f2","f3","f2","f2","f2","f3","f3","f2","f3","f3","f2","f2","f3","f3","f2","f3","f2","f3","f2","f3","f2","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f3","f2","f3","f2","f3","f3","f2","f3","f3","f3","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f2","f2","f3","f3","f2","f3","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f3","f2","f2","f2","f2","f3","f3","f2","f3","f3","f2","f2","f3","f3","f3","f2","f2","f3","f2","f3","f2","f3","f3","f3","f2","f2","f2","f2","f3","f3","f3","f3","f2","f2","f3","f3","f2","f2","f3","f3","f2","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f2","f3","f2","f3","f2","f3","f3","f2","f2","f3","f3","f2","f2","f2","f3","f2","f3","f3","f3","f2","f2","f3","f3","f2","f2","f2","f3","f3","f2","f3","f3","f3","f2","f2","f2","f2","f2","f2","f2","f2","f2","f2")
transaction <- c(1,120,50,5,1,2,1,9,6,12,5,25,14,6,22,9,10,14,15,12,220,12,12,14,9,11,100,90,110,12,13,4,3,1,2,3,3,5,7,5,5,6,9,16,8,13,10,20,15,18,10,19,15,5,13,12,10,12,26,14,0,4,0,0,0,2,0,0,2,0,4,0,6,8,0,2,3,0,2,0,1,0,1,0,2,0,0,2,1,1,0,0,3,0,1,0,3,0,0,6,5,2,0,8,0,0,12,11,0,2,0,11,0,0,14,21,0,0,13,7,0,17,0,0,18,0,7,0,4,4,0,0,7,12,0,13,0,0,130,160,9,0,0,0,16,0,0,16,0,14,0,0,9,0,11,8,0,8,0,0,8,0,10,5,0,15,0,0,3,0,0,8,8,0,0,6,5,0,8,0,0,5,1,0,0,95)
mydatax <- data.frame(datex, hourx, seller, product, detail, status, channel, transaction)
My task is to find outliers from any combination in my data using tsoutliers package. for sample, i use two combinations. 1st combination:
# Process 1
library(tsoutliers)
combination1 <- subset(mydatax, seller == "dombsdpapp1" &
product == "00021460" &
detail == "E99" &
status == "FI04" &
channel == "f2")
model.anomaly1 <- tso(as.ts(combination1$transaction))
find.anomaly.index1 <- subset(model.anomaly1$outliers, coefhat > 0)[,2]
data.anomaly1 <- combination1[find.anomaly.index1,]
data.anomaly1
#datex hourx seller product detail status channel transaction
#2 2021-01-18 1 dombsdpapp1 00021460 E99 FI04 f2 120
#27 2021-01-18 14 dombsdpapp1 00021460 E99 FI04 f2 100
#29 2021-01-18 16 dombsdpapp1 00021460 E99 FI04 f2 110
#139 2021-01-19 10 dombsdpapp1 00021460 E99 FI04 f2 130
2nd combination:
# Process 2
library(tsoutliers)
combination2 <- subset(mydatax, seller == "dombsdpapp2" &
product == "00021460" &
detail == "E99" &
status == "FI04" &
channel == "f2")
model.anomaly2 <- tso(as.ts(combination2$transaction))
find.anomaly.index2 <- subset(model.anomaly2$outliers, coefhat > 0)[,2]
data.anomaly2 <- combination2[find.anomaly.index2,]
data.anomaly2
#datex hourx seller product detail status channel transaction
#140 2021-01-19 10 dombsdpapp2 00021460 E99 FI04 f2 160
#186 2021-01-19 16 dombsdpapp2 00021460 E99 FI04 f2 95
After that, All looping insert into 1 table:
my.anomaly.result <- rbind(data.anomaly1, data.anomaly2)
my.anomaly.result
# datex hourx seller product detail status channel transaction
#2 2021-01-18 1 dombsdpapp1 00021460 E99 FI04 f2 120
#27 2021-01-18 14 dombsdpapp1 00021460 E99 FI04 f2 100
#29 2021-01-18 16 dombsdpapp1 00021460 E99 FI04 f2 110
#139 2021-01-19 10 dombsdpapp1 00021460 E99 FI04 f2 130
#140 2021-01-19 10 dombsdpapp2 00021460 E99 FI04 f2 160
#186 2021-01-19 16 dombsdpapp2 00021460 E99 FI04 f2 95
The painfull point is how do i loop all process to get All Result using dplyr? Because i have 100K Combinations. Thank you.
In data there are certain groups with only 1 or 2 rows. For such groups tso
functions returns an error. I have a written a custom function where I have set a threshold of 5 rows. So if a group has less than 5 rows, all the rows of that group are selected for rest of them we apply the function. You can adjust this 5 to any number as per your data.
library(dplyr)
library(tsoutliers)
get_outlier_index <- function(x) {
if(length(x) < 5) return(seq_along(x))
model.anomaly <- tso(as.ts(x))
model.anomaly$outliers$ind[model.anomaly$outliers$coefhat > 0]
}
mydatax %>%
group_by(across(seller:channel)) %>%
slice(get_outlier_index(transaction)) %>%
ungroup
# datex hourx seller product detail status channel transaction
# <chr> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
# 1 2021-01-18 7 dombsdpapp1 00021459 E99 FI04 f3 25
# 2 2021-01-18 11 dombsdpapp1 00021459 E99 FI04 f3 220
# 3 2021-01-19 5 dombsdpapp1 00021459 E99 FI04 f3 6
# 4 2021-01-18 10 dombsdpapp1 00021459 notEnoughBalance OK01 f3 12
# 5 2021-01-18 1 dombsdpapp1 00021460 E99 FI04 f2 120
# 6 2021-01-18 14 dombsdpapp1 00021460 E99 FI04 f2 100
# 7 2021-01-18 16 dombsdpapp1 00021460 E99 FI04 f2 110
# 8 2021-01-19 10 dombsdpapp1 00021460 E99 FI04 f2 130
# 9 2021-01-19 11 dombsdpapp1 00021460 notEnoughBalance OK01 f2 0
#10 2021-01-18 11 dombsdpapp2 00021459 notEnoughBalance OK01 f3 0
#11 2021-01-19 14 dombsdpapp2 00021459 notEnoughBalance OK01 f3 0
#12 2021-01-19 10 dombsdpapp2 00021460 E99 FI04 f2 160
#13 2021-01-19 16 dombsdpapp2 00021460 E99 FI04 f2 95
#14 2021-01-19 11 dombsdpapp2 00021460 notEnoughBalance OK01 f2 0