I'm trying to parse RFC 2822 using fparsec but I'm unable to deal with the headers with multiple lines: (it gets confused with the next header): here's my best try: do you have any tips?
let str (s:string) = pstring s
let stringLiteral =
manySatisfy (fun c -> c <> ':' && c <> '\r' && c <> '\n')
let ws = many (pchar ' ')
let keyValueSimple = stringLiteral .>>. (ws >>. str ":" >>. ws >>. stringLiteral) .>>. pchar '\n' |>> (fun (a,b) -> a)
let lineValue = ws >>. stringLiteral .>>. (pchar '\n' .>>. ( pchar '\t')) |>> ( fun (a,b) -> a )
let lastValue = ws >>. stringLiteral .>>. (pchar '\n' .>> notFollowedBy ( pchar '\t') ) |>> ( fun (a,b) -> a )
let keyValueComplex = stringLiteral .>>. (ws >>. pchar ':') .>>. (many lineValue) .>>. lastValue |>> ( fun (((f),d),b) -> (f,f) )
let headers = many1 (keyValueComplex)
let parse (fileName:string) =
test headers "Return-Path: <[email protected]>\n\twerwe\nDelivered-To: [email protected]\n "
I get expecting \t at 3,1: Delivered-To: [email protected]
never mind it looks like I needed to backtrack (using attempt) in order for the parser no to always expect a \t but instead look for the next header.
let keyValueComplex = stringLiteral .>>. (ws >>. pchar ':') .>>. (many (attempt lineValue)) .>>. lastValue |>> ( fun ((((g,h)),d),b) -> (g,(Seq.fold (+) "" d) + b) )
This now yields:
Success: [("Return-Path", "<[email protected]>werwe"); ("Delivered-To", "[email protected]")]