Search code examples
rdataframesequencerules

find a sequence of rules in a dataframe, with break rules


I showed how I see the implementation of this algorithm, I divided it into two steps

step one sequence search

enter image description here

step two check break rules

enter image description here

set.seed(123)
dat <- as.data.frame(matrix(sample(10,60,replace = T),ncol = 3))
colnames(dat) <- LETTERS[1:ncol(dat)]
dat

rule <- c("A==0","A==10 & B==4","C==9","A>10","B<0","C==0","A==5","A>10",
          "B<0","C==0","A==9 & B==9","A>10","B<0","A==10","A==7 & B==5")
action <- c("break","next","next",rep("break",3),"next",rep("break",3),
            "next",rep("break",3) ,"next")

rule <- cbind(rule,action)

enter image description here


Solution

  • I want to say a huge thank you to everyone who tried to help me, as well as for unlimited patience .. But it was impossible to help me because I myself did not fully understand what I wanted. Instead of breaking the question into several parts and asking separately (as it should be), I asked a big difficult question that I could hardly explain to myself.

    I am very very sorry for that. Here is my answer, this is what I wanted to get in the end.

       seq_rule2 <- function(dat , rule ,res.only = TRUE){
      
      # This is a fast function written by Thomas here
      # https://stackoverflow.com/questions/68625542/match-all-logic-rules-with-a-dataframe-need-super-fast-function
      # as an answer to my earlier question. 
      # It takes the rules as a vector and looks for the sequence
      
      
      seq_rule <- function(dat, rule, res.only = TRUE) {
        m <- with(dat, lapply(rule, function(r) eval(str2expression(r))))
        fu <-  function(x, y) {
          k <- which(y)
          ifelse(all(k <= x), NA, min(k[k > x]))
        }
        idx <- na.omit(Reduce( fu, m,init = 0, accumulate = TRUE ))[-1]
        if (!res.only) {
          fidx <- head(idx, length(rule))
          debug.vec <- replace(rep("no", nrow(dat)), fidx, rule[seq_along(fidx)])
          return(cbind(dat, debug.vec))
        }
        length(idx) >= length(rule)
      }
      
      
      
      #if there is only one next rule, then there is no point in continuing to return the FALSE and finish completely
      if(  length(rule$rule[rule$action=="next"]) <= 1  )  return(FALSE)
      
      # STEP 1  
      # run seq_rule  
      yes.next.rule.seq <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = T)
      
      if(res.only==FALSE & yes.next.rule.seq==FALSE) {
        Next <- rep("no",nrow(dat)) 
        Break <- rep("no",nrow(dat)) 
        dat <- cbind(dat,Next=Next, Break=Break)
        return(dat)
      }
      if(res.only==TRUE & yes.next.rule.seq==FALSE)  return(FALSE)
      
      
      # if the seq_rule found the sequence (TRUE) but there are no "break rules" in the "rule",
      # then there is no point in searching for "break rules". Return TRUE and finish completely
      if( length(rule$rule[rule$action=="break"]) == 0  &  yes.next.rule.seq == TRUE) return(TRUE)
      
      # STEP 2
      #looking for break rules in the range between next rules
      
      if(yes.next.rule.seq){
        
        
        #get indices where the "next rules"  triggered in dat  
        deb.vec <- seq_rule(dat = dat , rule = rule$rule[rule$action=="next"] , res.only = F)[,"debug.vec"]
        idx.next.rules <- which(deb.vec!="no")
        
        
        #get indices where the "break rules"  triggered in dat  
        m <- with(dat, lapply(rule$rule[rule$action=="break"], function(r) eval(str2expression(r))))
        idx.break.rules <- unlist(lapply(m,which))
        
        
        # RES the final result is equal to TRUE, 
        # but if a "break rule" is found between the "next rules", 
        # then the RES will be false
        RES <- TRUE  
        
        
        # sliding window of two "next rules"  http://prntscr.com/1qhnzae
        for(i in 2:length(idx.next.rules)){
          temp.range <- idx.next.rules[  (i-1):i  ]
          # Check if there is any "break rule" index between the "next rule" indexes
          break.detect <- any(  idx.break.rules > temp.range[1]   &  idx.break.rules < temp.range[2] )
          if( break.detect )   RES <- FALSE ; break
        }
        
      }
      
      
      if(!res.only) {
        Next <- rep("no",nrow(dat)) ; Next[idx.next.rules] <- "yes"
        Break <- rep("no",nrow(dat)) ; Break[idx.break.rules] <- "yes"
        dat <- cbind(dat,Next=Next, Break=Break)
        return(dat)
      }
      return(RES)
    }
    

    data for to check

    set.seed(963)
    dat <- as.data.frame(matrix(sample(10,30,replace = T),ncol = 3))
    colnames(dat) <- LETTERS[1:ncol(dat)]
    rule <- cbind.data.frame(rule= c("A==9","B==4","C==4","A==4") ,
                             action= c("next","break","break","next"))
    rule <- as.data.frame(rule,stringsAsFactors = F)
    seq_rule2(dat = dat, rule = rule)
    dat
    rule
    

    for example no breaks set.seed(963) http://prntscr.com/1qhprxq

    with break set.seed(930) http://prntscr.com/1qhpv2h