Search code examples
f#indentationfparsec

Create a sepBy parser combinator sensitive to the indentation of the first parser


Via FParsec and using this lib (whose code, which is quite short, has been copied to the end of the question), I'm trying to design a sepBy-like parser that is sensitive to the indentation of the first parser passed in argument. Typically, if I give this: indentedSepBy (pstring "Example") (pchar '.'), I would like this type of program to be acceptable:

Example
  .Example
  .Example.Example
  .Example.Example.Example
  .Example

But not this one:

Example
.Example
.Example.Example
.Example

So that the very first position (and so the very first parser) setting the indentation of the rest. To do this, I just tried a simple rewrite using FParsec's default sepBy parser by applying it to the indentation lib, I get this:

open FParsec
open IndentParsec

let indentSepBy p sep =
    parse {
        let! pos = getPosition
        return! sepBy (greater pos p) (greater pos sep)
    }

let test = indentSepBy (pstring "Example") (pchar '.')
let text = "Example.Example" (*Simple for start*)

Applying this, I get the following error message (from FParsec):

"Error in Ln: 1 Col: 16
Example.Example
               ^
Note: The error occurred at the end of the input stream.
Expecting: '.'
"

If I remove the indentation-related parsers,

let indentSepBy p sep = (*so it's just trivially equivalent to the sepBy parser*)
    parse {
        let! pos = getPosition
        return! sepBy p sep
    }

The problem no longer arises and the result is what we expected. Therefore, I do not understand which parameter causes this error. It seems quite likely that it is a problem in the indentation lib, but I can't figure it out... Here is the lib in question, which I have shortened to the essentials:

open FParsec

module IndentParser =
    type Indentation =
        | Fail
        | Any
        | Greater of Position
        | Exact of Position
        | AtLeast of Position
        | StartIndent of Position
        member this.Position =
            match this with
            | Any
            | Fail -> None
            | Greater p -> Some p
            | Exact p -> Some p
            | AtLeast p -> Some p
            | StartIndent p -> Some p

    type IndentState<'T> = { Indent: Indentation; UserState: 'T }
    type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>

    let indentState u = { Indent = Any; UserState = u }
    let runParser p u s = runParserOnString p (indentState u) "" s

    let runParserOnFile p u path =
        runParserOnFile p (indentState u) path System.Text.Encoding.UTF8

    let getIndentation: IndentParser<_, _> =
        fun stream ->
            match stream.UserState with
            | { Indent = i } -> Reply i

    let putIndentation newi: IndentParser<unit, _> =
        fun stream ->
            stream.UserState <- { stream.UserState with Indent = newi }
            Reply(Unchecked.defaultof<unit>)

    let failf fmt = fail << sprintf fmt

    let acceptable i (pos: Position) =
        match i with
        | Any _ -> true
        | Fail -> false
        | Greater bp -> bp.Column < pos.Column
        | Exact ep -> ep.Column = pos.Column
        | AtLeast ap -> ap.Column <= pos.Column
        | StartIndent _ -> true

    let nestableIn i o =
        match i, o with
        | Greater i, Greater o -> o.Column < i.Column
        | Greater i, Exact o -> o.Column < i.Column
        | Exact i, Exact o -> o.Column = i.Column
        | Exact i, Greater o -> o.Column <= i.Column
        | _, _ -> true

    let tokeniser p =
        parse {
            let! pos = getPosition
            let! i = getIndentation

            if acceptable i pos
            then return! p
            else return! failf "incorrect indentation at %A" pos
        }

    let nestP i o p =
        parse {
            do! putIndentation i
            let! x = p

            do! notFollowedBy (tokeniser anyChar)
                <?> (sprintf "unterminated %A" i)

            do! putIndentation o

            return x
        }

    let indented<'a, 'u> i (p: Parser<'a, _>): IndentParser<_, 'u> =
        parse {
            do! putIndentation i
            do! spaces
            return! tokeniser p
        }

    let exact<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
    let greater<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
    let atLeast<'a, 'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
    let any<'a, 'u> pos p: IndentParser<'a, 'u> = indented Any p

Solution

  • I think there are two problems here:

    1. You're requiring the first "Example" to be indented beyond itself, which is impossible. You should instead let the first parser succeed regardless of the current position.
    2. greater is not atomic, so when it fails, your parser is left in an invalid state. This might or might not be considered a bug in the library. In any case, you can make it atomic via attempt.

    With that in mind, I think the following parser does roughly what you want:

    let indentSepBy p sep =
        parse {
            let! pos = getPosition
            let! head = p
            let! tail =
                let p' = attempt (greater pos p)
                let sep' = attempt (greater pos sep)
                many (sep' >>. p')
            return head :: tail
        }
    

    You can test this as follows:

    let test =
        indentSepBy (pstring "Example") (pchar '.')
    
    let run text =
        printfn "***"
        runParser (test .>> eof) () text
            |> printfn "%A"
    
    [<EntryPoint>]
    let main argv =
        run "Example.Example"      // success
        run "Example\n.Example"    // failure
        run "Example\n .Example"   // success
        0
    

    Note that I've forced the test parser to consume the entire input via eof. Otherwise, it will falsely report success when it can't in fact parse the full string.