Search code examples
.netf#fparsec

Basic error recovery with FParsec


Assume I've this parser:

let test p str =
    match run p str with
    | Success(result, _, _)   -> printfn "Success: %A" result
    | Failure(errorMsg, _, _) -> printfn "Failure: %s" errorMsg

let myStatement =
    choice (seq [
                pchar '2' >>. pchar '+' .>> pchar '3' .>> pchar ';';
                pchar '3' >>. pchar '*' .>> pchar '4' .>> pchar ';';
            ])

let myProgram = many myStatement

test myProgram "2+3;3*4;3*4;" // Success: ['+'; '*'; '*']

Now, "2+3;2*4;3*4;3+3;" will fail with error around 2*4;. But what is best practice if I want both the error for 2*4; and 3+3;? Basically, I want to scan to the nearest ';' but only if there is a fatal error. And if that happens I want to aggregate the errors.

Kind regards, Lasse Espeholt

Update: recoverWith is a nice solution, thanks! But given:

let myProgram = 
    (many1 (myStatement |> recoverWith '�')) <|>% []

test myProgram "monkey"

I would expect to get [] with no errors. Or maybe a bit more "fair":

let myProgram = 
    (attempt (many1 (myStatement |> recoverWith '�'))) <|>% []

Solution

  • FParsec has no built-in support for recovering from fatal parser errors that would allow you to obtain partial parser results and collect errors from multiple positions. However, it's pretty easy to define a custom combinator function for this purpose.

    For example, to recover from errors in your simple statement parser you could define the following recoverWith combinator:

    open FParsec
    
    type UserState = {
        Errors: (string * ParserError) list
    } with
        static member Create() = {Errors = []}
    
    type Parser<'t> = Parser<'t, UserState>
    
    // recover from error by skipping to the char after the next newline or ';'
    let recoverWith errorResult (p: Parser<_>) : Parser<_> =    
      fun stream ->
        let stateTag = stream.StateTag
        let mutable reply = p stream
        if reply.Status <> Ok then // the parser failed
            let error = ParserError(stream.Position, stream.UserState, reply.Error)
            let errorMsg = error.ToString(stream)
            stream.SkipCharsOrNewlinesWhile(fun c -> c <> ';' && c <> '\n') |> ignore                        
            stream.ReadCharOrNewline() |> ignore
            // To prevent infinite recovery attempts in certain situations,
            // the following check makes sure that either the parser p 
            // or our stream.Skip... commands consumed some input.
            if stream.StateTag <> stateTag then
                let oldErrors = stream.UserState.Errors
                stream.UserState <- {Errors = (errorMsg, error)::oldErrors}     
                reply <- Reply(errorResult)
        reply
    

    You could then use this combinator as follows:

    let myStatement =
        choice [
            pchar '2' >>. pchar '+' .>> pchar '3' .>> pchar ';'
            pchar '3' >>. pchar '*' .>> pchar '4' .>> pchar ';'
        ]
    
    let myProgram = 
        many (myStatement |> recoverWith '�') .>> eof
    
    let test p str =
        let printErrors (errorMsgs: (string * ParserError) list) =        
            for msg, _ in List.rev errorMsgs do
                printfn "%s" msg        
    
        match runParserOnString p (UserState.Create()) "" str with
        | Success(result, {Errors = []}, _) -> printfn "Success: %A" result
        | Success(result, {Errors = errors}, _) ->
            printfn "Result with errors: %A\n" result
            printErrors errors
        | Failure(errorMsg, error, {Errors = errors}) -> 
            printfn "Failure: %s" errorMsg
            printErrors ((errorMsg, error)::errors)
    

    Testing with test myProgram "2+3;2*4;3*4;3+3" would yield the output:

    Result with errors: ['+'; '�'; '*'; '�']
    
    Error in Ln: 1 Col: 6
    2+3;2*4;3*4;3+3
         ^
    Expecting: '+'
    
    Error in Ln: 1 Col: 14
    2+3;2*4;3*4;3+3
                 ^
    Expecting: '*'
    

    Update:

    Hmm, I thought you wanted to recover from a fatal error in order to collect multiple error messages and maybe produce a partial result. Something that would for example be useful for syntax highlighting or allowing your users to fix more than one error at a time.

    Your update seems to suggest that you just want to ignore parts of the input in case of a parser error, which is much simpler:

    let skip1ToNextStatement =
        notEmpty // requires at least one char to be skipped
            (skipManySatisfy (fun c -> c <> ';' && c <> '\n') 
             >>. optional anyChar) // optional since we might be at the EOF
    
    let myProgram =     
        many (attempt myStatement <|> (skip1ToNextStatement >>% '�'))
        |>> List.filter (fun c -> c <> '�')
    

    Update 2:

    The following is a version of recoverWith that doesn't aggregate errors and only tries to recover from an error if the argument parser consumed input (or changed the parser state in any other way):

    let recoverWith2 errorResult (p: Parser<_>) : Parser<_> =
      fun stream ->
        let stateTag = stream.StateTag
        let mutable reply = p stream
        if reply.Status <> Ok && stream.StateTag <> stateTag then
            stream.SkipCharsOrNewlinesWhile(fun c -> c <> ';' && c <> '\n') |> ignore
            stream.ReadCharOrNewline() |> ignore
            reply <- Reply(errorResult)
        reply