Search code examples
haskellcommand-linexml-parsinghxt

How to correctly collect command line options on Hxt program?


I've reach to the section 3 of the thesis.A Cookbook for the Haskell XML Toolbox with Examples for Processing RDF Documents of M. Ohlendorf.

Here is the program I've write,

import Text.XML.HXT.Core
import System.Exit
import System.Environment
import Data.Maybe

main = do
  args       <- getArgs
  (al, src)  <- cmdLineOpts args
  [rc]       <- runX (processDocument al src)
  exitWith ( if rc >= c_err
             then ExitFailure (-1)
             else ExitSuccess
           )

cmdLineOpts :: [String] -> IO (Attributes, String)
cmdLineOpts []  = return ([("","")], "")
cmdLineOpts xss = return (zip [""] xss :: Attributes, last xss)

processDocument :: Attributes -> String -> IOSArrow b Int
processDocument al src =
    readDocument al src -- lecture du document en appliquant les attributes                                                                                            
    >>>
    removeAllWhiteSpace >>> propagateNamespaces
    >>>
    writeDocument al (fromMaybe "" (lookup a_output_file al))
    >>>
    getErrStatus

But I still go through the following error

hxtuto.hs:28:17:
    Couldn't match expected type `XIOSysState -> XIOSysState'
           against inferred type `(String, String)'
      Expected type: SysConfigList
      Inferred type: Attributes
    In the first argument of `readDocument', namely `al'
    In the first argument of `(>>>)', namely `readDocument al src'
Failed, modules loaded: none.

It seems that it is my implementation of cmdLineOpts which doesn't fit well.

What is the problem here ? and How can I fix it ?

Thanks for any help !


Solution

  • Since the first parameter to both readDocument and writeDocument is [SysConfig], you might want to use a package like GetOpt to handle the housekeeping of reading text from the command line and transforming it into the required objects. I took the list of "available options" from page 50 of the thesis and created an Options type with the current, corresponding SysConfigs (from Text.XML.HXT.Arrow.XmlState.SystemConfig). Except for the parts that have been customized for the specific application at hand, the rest (e.g. cmdLineOpts) was taken directly from the GetOpt documentation.

    import System.Console.GetOpt
    import System.Environment
    import System.Exit 
    import Text.XML.HXT.Core
    
    data Options = Options {
        withvalidate :: SysConfig
      , withchecknamespaces :: SysConfig
      , withcanonicalize :: SysConfig
      , withremovews :: SysConfig
      , withtrace :: SysConfig
      , output_file :: String } 
    
    defaultOptions = Options { withvalidate = (withValidate no)
                             , withchecknamespaces = (withCheckNamespaces no)
                             , withcanonicalize = (withCanonicalize no)
                             , withremovews = (withRemoveWS no)
                             , withtrace = (withTrace 0)
                             , output_file = "" } 
    
    options :: [OptDescr (Options -> Options)]
    options =
     [ Option ['V'] ["withValidate"] 
       (ReqArg (\v opts -> opts { withvalidate = withValidate (v == "yes") } ) "") 
       "perform DTD validation"
     , Option ['n'] ["withCheckNamespaces"] 
       (ReqArg (\n opts -> opts { withchecknamespaces = withCheckNamespaces (n == "yes") } ) "")
       "check namespaces"
     , Option ['c'] ["withCanonicalize"] 
       (ReqArg (\c opts -> opts { withcanonicalize = withCanonicalize (c == "yes") } ) "")
       "canonicalize document"
     , Option ['w'] ["withRemoveWS"] 
       (ReqArg (\w opts -> opts { withremovews = withRemoveWS (w == "yes") } ) "")
       "remove whitespace used for document indentation"
     , Option ['t'] ["withTrace"] 
       (ReqArg (\t opts -> opts { withtrace = withTrace (read t) } ) "")
       "set trace level" 
     , Option ['o'] ["outputFile"] 
       (ReqArg (\o opts -> opts { output_file = o } ) "") 
       "output file" ]
    
    cmdLineOpts :: [String] -> IO (Options, [String])
    cmdLineOpts argv =
        case getOpt Permute options argv of
          (o, n, []) -> return (foldl (flip id) defaultOptions o, n)
          (_, _, errs) -> ioError (userError (concat errs ++ usageInfo header options))
        where header = "Using: [OPTION ...]"
    
    main :: IO ()
    main = do (opts, (src:_)) <- cmdLineOpts =<< getArgs 
              [rc] <- runX $ processDocument opts src
              exitWith $ if rc >= c_err then ExitFailure (-1) else ExitSuccess
    
    processDocument :: Options -> String -> IOSArrow b Int
    processDocument (Options val ns can ws tr out) src =
        readDocument [val, ns, can, ws, tr] src >>> 
        removeAllWhiteSpace >>> propagateNamespaces >>>
        writeDocument [val, ns, can, ws, tr] out >>>
        getErrStatus