Parsing case statements in scala
CASE WHEN col1 <> 0 AND col2 <> 0 THEN 'COL1 & COL2 IS NOT ZERO' ELSE 'COL1 & COL2 IS ZERO'
challenge here is to give all the scenarios where case statement can come for e.g. it can come inside a function. Also case statements/functions etc. can come inside another case statements which has to be handled.
This problem can be solved with scala parser combinator
first define the classes needed to map experssions
sealed trait Exp {
def asStr: String
override def toString: String = asStr
}
case class OperationExp(a: Exp, op: String, b: Exp, c: Option[String]) extends Exp { override def asStr = s"$a $op $b ${c.getOrElse("")}" }
case class CaseConditions(conditionValue: List[(String, String)] , elseValue: String, asAlias: Option[Exp]) extends Exp {
override def asStr = "CASE " + conditionValue.map(c => s"WHEN ${c._1} THEN ${c._2}").mkString(" ") + s" ELSE ${elseValue} END ${asAlias.getOrElse("")}"
}
now the solution
case class OperationExp(a: Exp, op: String, b: Exp, c: Option[String]) extends Exp { override def asStr = s"$a $op $b ${c.getOrElse("")}" }
case class CaseConditions(conditionValue: List[(String, String)] , elseValue: String, asAlias: Option[Exp]) extends Exp {
override def asStr = "CASE " + conditionValue.map(c => s"WHEN ${c._1} THEN ${c._2}").mkString(" ") + s" ELSE ${elseValue} END ${asAlias.getOrElse("")}"
}
val identifiers: Parser[String] = "[a-zA-Z0-9_~\\|,'\\-\\+:.()]+".r
val operatorTokens: Parser[String] = "[<>=!]+".r | ("IS NOT" | "IN" | "IS")
val conditionJoiner: Parser[String] = ( "AND" | "OR" )
val excludeKeywords = List("CASE","WHEN", "THEN", "ELSE", "END")
val identifierWithoutCaseKw: Parser[Exp] = Parser(input =>
identifiers(input).filterWithError(
!excludeKeywords.contains(_),
reservedWord => s"$reservedWord encountered",
input
)
) ^^ StrExp
val anyStrExp: Parser[Exp] = "[^()]*".r ^^ StrExp
val funcIdentifier: Parser[Exp] = name ~ ("(" ~> (caseConditionExpresionParser | funcIdentifier | anyStrExp) <~ ")") ^^ {case func ~ param => FunCallExp(func, Seq(param))}
val identifierOrFunctions = funcIdentifier | identifierWithoutCaseKw
val conditionParser: Parser[String] =
identifierOrFunctions ~ operatorTokens ~ identifierOrFunctions ~ opt(conditionJoiner) ^^ {
case a ~ op ~ b ~ c => s"$a $op $b ${c.getOrElse("")}"
}
def caseConditionExpresionParser: Parser[CaseConditions] = "CASE" ~ rep1("WHEN" ~ rep(conditionParser) ~ "THEN" ~ rep(identifierWithoutCaseKw)) ~ "ELSE" ~ rep(identifierWithoutCaseKw) ~ "END" ~ opt("AS" ~> identifierWithoutCaseKw)^^ {
case "CASE" ~ conditionValuePair ~ "ELSE" ~ falseValue ~ "END" ~ asName =>
CaseConditions(
conditionValuePair.map(cv => (
cv._1._1._2.mkString(" "),
parsePipes(cv._2.mkString(" ")).isRight match {
case true => parsePipes(cv._2.mkString(" ")).right.get
case _ => cv._2.mkString(" ")
}
)),
parsePipes(falseValue.mkString("")).isRight match {
case true => parsePipes(falseValue.mkString(" ")).right.get
case _ => falseValue.mkString("")
}, asName)
}
//this parser can be used to get the results
val caseExpression = caseConditionExpresionParser | funcIdentifier
def parsePipes(input: String): Either[Seq[ParsingError], String] = {
parse(caseExpression, input) match {
case Success(parsed, _) => Right(parsed.asStr)
case Failure(msg, next) => Left(Seq(ParsingError(s"Failed to parse $pipedStr: $msg, next: ${next.source}.")))
case Error(msg, next) => Left(Seq(ParsingError(s"Error in $pipedStr parse: $msg, next: ${next.source}.")))
}
}