Search code examples
parsingf#commentsmultilinefparsec

fparsec - combinator "many" complains and... why not parse block comments like this?


This question, first off, is not a duplicate of my question. Actually I have 3 questions.

In the code below, I try to create a parser which parses possibly nested multiline block comments. In contrast to the cited other question, I try to solve the problem in a straightforward way without any recursive functions (see the accepted answer to the other post).

The first problem I ran into was that skipManyTill parser of FParsec also consumes the end parser from the stream. So I created skipManyTillEx (Ex for 'excluding endp' ;) ). The skipManyTillEx seems to work - at least for the one test case I also added to the fsx script.

Yet in the code, shown, now I get the "The combinator 'many' was applied to a parser that succeeds without consuming..." error. My theory is, that the commentContent parser is the line which produces this error.

Here, my questions:

  1. Is there any reason, why the approach I have chosen cannot work? The solution in 1, which, unfortunately does not seem to compile on my system uses a recursive low level parser for (nested) multiline comments.
  2. Can anyone see a problem with the way I implemented skipManyTillEx? The way I implemented it differs to some degree from the way skipManyTill is implemented, mostly in the aspect of how to control the parsing flow. In original skipManyTill, the Reply<_> of p and endp is tracked, along with the stream.StateTag. In my implementation, in contrast I did not see the need to use stream.StateTag, solely relying on the Reply<_> status code. In case of an unsuccessful parse, skipManyTillEx backtracks to the streams initial state and reports an error. Could possibly the backtracking code cause the 'many' error? What would I have to do instead?
  3. (and that is the main question) - Does anyone see, how to fix the parser such, that this "many ... " error message goes away?

Here is the code:

#r @"C:\hgprojects\fparsec\Build\VS11\bin\Debug\FParsecCS.dll"
#r @"C:\hgprojects\fparsec\Build\VS11\bin\Debug\FParsec.dll"

open FParsec

let testParser p input =
    match run p input with
    | Success(result, _, _) -> printfn "Success: %A" result
    | Failure(errorMsg, _, _) -> printfn "Failure %s" errorMsg
    input

let Show (s : string) : string =
    printfn "%s" s
    s

let test p i =
    i |> Show |> testParser p |> ignore

////////////////////////////////////////////////////////////////////////////////////////////////
let skipManyTillEx (p : Parser<_,_>) (endp : Parser<_,_>) : Parser<unit,unit> =
    fun stream ->
        let tryParse (p : Parser<_,_>) (stm : CharStream<unit>) : bool = 
            let spre = stm.State
            let reply = p stream
            match reply.Status with
            | ReplyStatus.Ok -> 
                stream.BacktrackTo spre
                true
            | _ -> 
                stream.BacktrackTo spre
                false
        let initialState = stream.State
        let mutable preply = preturn () stream
        let mutable looping = true
        while (not (tryParse endp stream)) && looping do
            preply <- p stream
            match preply.Status with
            | ReplyStatus.Ok -> ()
            | _ -> looping <- false
        match preply.Status with
            | ReplyStatus.Ok -> preply
            | _ ->
                let myReply = Reply(Error, mergeErrors preply.Error (messageError "skipManyTillEx failed") )
                stream.BacktrackTo initialState
                myReply



let ublockComment, ublockCommentImpl = createParserForwardedToRef()
let bcopenTag = "/*"
let bccloseTag = "*/"
let pbcopen = pstring bcopenTag
let pbcclose = pstring bccloseTag
let ignoreCommentContent : Parser<unit,unit> = skipManyTillEx (skipAnyChar)  (choice [pbcopen; pbcclose] |>> fun x -> ())
let ignoreSubComment : Parser<unit,unit> = ublockComment
let commentContent : Parser<unit,unit> = skipMany (choice [ignoreCommentContent; ignoreSubComment])
do ublockCommentImpl := between (pbcopen) (pbcclose) (commentContent) |>> fun c -> ()

do test (skipManyTillEx (pchar 'a' |>> fun c -> ()) (pchar 'b') >>. (restOfLine true)) "aaaabcccc"

// do test ublockComment "/**/"
//do test ublockComment "/* This is a comment \n With multiple lines. */"
do test ublockComment "/* Bla bla bla /* nested bla bla */ more outer bla bla */"

Solution

  • Finally found a way to fix the many problem. Replaced my custom skipManyTillEx with another custom function I called skipManyTill1Ex. skipManyTill1Ex, in contrast to the previous skipManyTillEx only succeeds if it parsed 1 or more p successfully.

    I expected the test for the empty comment /**/ to fail for this version but it works.

    ...
    let skipManyTill1Ex (p : Parser<_,_>) (endp : Parser<_,_>) : Parser<unit,unit> =
        fun stream ->
            let tryParse (p : Parser<_,_>) (stm : CharStream<unit>) : bool = 
                let spre = stm.State
                let reply = p stm
                match reply.Status with
                | ReplyStatus.Ok -> 
                    stream.BacktrackTo spre
                    true
                | _ -> 
                    stream.BacktrackTo spre
                    false
            let initialState = stream.State
            let mutable preply = preturn () stream
            let mutable looping = true
            let mutable matchCounter = 0
            while (not (tryParse endp stream)) && looping do
                preply <- p stream
                match preply.Status with
                | ReplyStatus.Ok -> 
                    matchCounter <- matchCounter + 1
                    ()
                | _ -> looping <- false
            match (preply.Status, matchCounter) with
                | (ReplyStatus.Ok, c) when (c > 0) -> preply
                | (_,_) ->
                    let myReply = Reply(Error, mergeErrors preply.Error (messageError "skipManyTill1Ex failed") )
                    stream.BacktrackTo initialState
                    myReply
    
    
    let ublockComment, ublockCommentImpl = createParserForwardedToRef()
    let bcopenTag = "/*"
    let bccloseTag = "*/"
    let pbcopen = pstring bcopenTag
    let pbcclose = pstring bccloseTag
    let ignoreCommentContent : Parser<unit,unit> = skipManyTill1Ex (skipAnyChar)  (choice [pbcopen; pbcclose] |>> fun x -> ())
    let ignoreSubComment : Parser<unit,unit> = ublockComment
    let commentContent : Parser<unit,unit> = skipMany (choice [ignoreCommentContent; ignoreSubComment])
    do ublockCommentImpl := between (pbcopen) (pbcclose) (commentContent) |>> fun c -> ()
    
    do test (skipManyTillEx (pchar 'a' |>> fun c -> ()) (pchar 'b') >>. (restOfLine true)) "aaaabcccc"
    
    do test ublockComment "/**/"
    do test ublockComment "/* This is a comment \n With multiple lines. */"
    do test ublockComment "/* Bla bla bla /* nested bla bla */ more outer bla bla */"