Search code examples
regexscalaparser-combinators

Parsing case statements in scala


Parsing case statements in scala

CASE WHEN col1 <> 0 AND col2 <> 0 THEN 'COL1 & COL2 IS NOT ZERO' ELSE 'COL1 & COL2 IS ZERO'

challenge here is to give all the scenarios where case statement can come for e.g. it can come inside a function. Also case statements/functions etc. can come inside another case statements which has to be handled.


Solution

  • This problem can be solved with scala parser combinator

    first define the classes needed to map experssions

    sealed trait Exp {
      def asStr: String
    
      override def toString: String = asStr
    }
    case class OperationExp(a: Exp, op: String, b: Exp, c: Option[String]) extends Exp { override def asStr = s"$a $op $b ${c.getOrElse("")}" }
    case class CaseConditions(conditionValue: List[(String, String)] , elseValue: String, asAlias: Option[Exp]) extends Exp {
      override def asStr = "CASE " + conditionValue.map(c => s"WHEN ${c._1} THEN ${c._2}").mkString(" ") + s" ELSE ${elseValue} END ${asAlias.getOrElse("")}"
    }
    

    now the solution

    case class OperationExp(a: Exp, op: String, b: Exp, c: Option[String]) extends Exp { override def asStr = s"$a $op $b ${c.getOrElse("")}" }
    
    case class CaseConditions(conditionValue: List[(String, String)] , elseValue: String, asAlias: Option[Exp]) extends Exp {
      override def asStr = "CASE " + conditionValue.map(c => s"WHEN ${c._1} THEN ${c._2}").mkString(" ") + s" ELSE ${elseValue} END ${asAlias.getOrElse("")}"
    }
    val identifiers: Parser[String]     = "[a-zA-Z0-9_~\\|,'\\-\\+:.()]+".r
          val operatorTokens: Parser[String]  = "[<>=!]+".r | ("IS NOT" | "IN" | "IS")
      val conditionJoiner: Parser[String] = ( "AND" | "OR" )
    
      val excludeKeywords = List("CASE","WHEN", "THEN", "ELSE", "END")
    
    
      val identifierWithoutCaseKw: Parser[Exp] = Parser(input =>
        identifiers(input).filterWithError(
          !excludeKeywords.contains(_),
          reservedWord => s"$reservedWord encountered",
          input
        )
      ) ^^ StrExp
    
      val anyStrExp: Parser[Exp]     = "[^()]*".r  ^^ StrExp
    
    
    
    
      val funcIdentifier: Parser[Exp] = name ~ ("(" ~> (caseConditionExpresionParser | funcIdentifier | anyStrExp) <~ ")") ^^ {case func ~ param => FunCallExp(func, Seq(param))}
    
      val identifierOrFunctions =  funcIdentifier | identifierWithoutCaseKw
    
      val conditionParser: Parser[String] =
        identifierOrFunctions ~ operatorTokens ~ identifierOrFunctions ~ opt(conditionJoiner) ^^ {
          case a ~ op ~ b ~ c => s"$a $op $b ${c.getOrElse("")}"
        }
    
      def caseConditionExpresionParser: Parser[CaseConditions]  = "CASE" ~ rep1("WHEN" ~ rep(conditionParser) ~ "THEN" ~ rep(identifierWithoutCaseKw)) ~ "ELSE" ~ rep(identifierWithoutCaseKw) ~ "END" ~ opt("AS" ~> identifierWithoutCaseKw)^^ {
        case "CASE" ~ conditionValuePair ~ "ELSE" ~ falseValue ~ "END" ~ asName =>
          CaseConditions(
            conditionValuePair.map(cv => (
              cv._1._1._2.mkString(" "),
              parsePipes(cv._2.mkString(" ")).isRight match {
                case true => parsePipes(cv._2.mkString(" ")).right.get
                case _    => cv._2.mkString(" ")
              }
            )),
            parsePipes(falseValue.mkString("")).isRight match {
              case true => parsePipes(falseValue.mkString(" ")).right.get
              case _    => falseValue.mkString("")
            }, asName)
      }
    //this parser can be used to get the results
      val caseExpression = caseConditionExpresionParser | funcIdentifier
    
    def parsePipes(input: String): Either[Seq[ParsingError], String] = {
        parse(caseExpression, input) match {
          case Success(parsed, _) => Right(parsed.asStr)
          case Failure(msg, next) => Left(Seq(ParsingError(s"Failed to parse $pipedStr: $msg, next: ${next.source}.")))
          case Error(msg, next)   => Left(Seq(ParsingError(s"Error in $pipedStr parse: $msg, next: ${next.source}.")))
        }
      }