Via FParsec and using this lib (whose code, which is quite short, has been copied to the end of the question), I'm trying to design a sepBy
-like parser that is sensitive to the indentation of the first parser passed in argument. Typically, if I give this: indentedSepBy (pstring "Example") (pchar '.')
, I would like this type of program to be acceptable:
Example
.Example
.Example.Example
.Example.Example.Example
.Example
But not this one:
Example
.Example
.Example.Example
.Example
So that the very first position (and so the very first parser) setting the indentation of the rest. To do this, I just tried a simple rewrite using FParsec's default sepBy parser by applying it to the indentation lib, I get this:
open FParsec
open IndentParsec
let indentSepBy p sep =
parse {
let! pos = getPosition
return! sepBy (greater pos p) (greater pos sep)
}
let test = indentSepBy (pstring "Example") (pchar '.')
let text = "Example.Example" (*Simple for start*)
Applying this, I get the following error message (from FParsec):
"Error in Ln: 1 Col: 16 Example.Example ^ Note: The error occurred at the end of the input stream. Expecting: '.' "
If I remove the indentation-related parsers,
let indentSepBy p sep = (*so it's just trivially equivalent to the sepBy parser*)
parse {
let! pos = getPosition
return! sepBy p sep
}
The problem no longer arises and the result is what we expected. Therefore, I do not understand which parameter causes this error. It seems quite likely that it is a problem in the indentation lib, but I can't figure it out... Here is the lib in question, which I have shortened to the essentials:
open FParsec
module IndentParser =
type Indentation =
| Fail
| Any
| Greater of Position
| Exact of Position
| AtLeast of Position
| StartIndent of Position
member this.Position =
match this with
| Any
| Fail -> None
| Greater p -> Some p
| Exact p -> Some p
| AtLeast p -> Some p
| StartIndent p -> Some p
type IndentState<'T> = { Indent: Indentation; UserState: 'T }
type IndentParser<'T, 'UserState> = Parser<'T, IndentState<'UserState>>
let indentState u = { Indent = Any; UserState = u }
let runParser p u s = runParserOnString p (indentState u) "" s
let runParserOnFile p u path =
runParserOnFile p (indentState u) path System.Text.Encoding.UTF8
let getIndentation: IndentParser<_, _> =
fun stream ->
match stream.UserState with
| { Indent = i } -> Reply i
let putIndentation newi: IndentParser<unit, _> =
fun stream ->
stream.UserState <- { stream.UserState with Indent = newi }
Reply(Unchecked.defaultof<unit>)
let failf fmt = fail << sprintf fmt
let acceptable i (pos: Position) =
match i with
| Any _ -> true
| Fail -> false
| Greater bp -> bp.Column < pos.Column
| Exact ep -> ep.Column = pos.Column
| AtLeast ap -> ap.Column <= pos.Column
| StartIndent _ -> true
let nestableIn i o =
match i, o with
| Greater i, Greater o -> o.Column < i.Column
| Greater i, Exact o -> o.Column < i.Column
| Exact i, Exact o -> o.Column = i.Column
| Exact i, Greater o -> o.Column <= i.Column
| _, _ -> true
let tokeniser p =
parse {
let! pos = getPosition
let! i = getIndentation
if acceptable i pos
then return! p
else return! failf "incorrect indentation at %A" pos
}
let nestP i o p =
parse {
do! putIndentation i
let! x = p
do! notFollowedBy (tokeniser anyChar)
<?> (sprintf "unterminated %A" i)
do! putIndentation o
return x
}
let indented<'a, 'u> i (p: Parser<'a, _>): IndentParser<_, 'u> =
parse {
do! putIndentation i
do! spaces
return! tokeniser p
}
let exact<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Exact pos) p
let greater<'a, 'u> pos p: IndentParser<'a, 'u> = indented (Greater pos) p
let atLeast<'a, 'u> pos p: IndentParser<'a, 'u> = indented (AtLeast pos) p
let any<'a, 'u> pos p: IndentParser<'a, 'u> = indented Any p
I think there are two problems here:
"Example"
to be indented beyond itself, which is impossible. You should instead let the first parser succeed regardless of the current position.greater
is not atomic, so when it fails, your parser is left in an invalid state. This might or might not be considered a bug in the library. In any case, you can make it atomic via attempt
.With that in mind, I think the following parser does roughly what you want:
let indentSepBy p sep =
parse {
let! pos = getPosition
let! head = p
let! tail =
let p' = attempt (greater pos p)
let sep' = attempt (greater pos sep)
many (sep' >>. p')
return head :: tail
}
You can test this as follows:
let test =
indentSepBy (pstring "Example") (pchar '.')
let run text =
printfn "***"
runParser (test .>> eof) () text
|> printfn "%A"
[<EntryPoint>]
let main argv =
run "Example.Example" // success
run "Example\n.Example" // failure
run "Example\n .Example" // success
0
Note that I've forced the test
parser to consume the entire input via eof
. Otherwise, it will falsely report success when it can't in fact parse the full string.