Search code examples
xmlerlang

How to convert XML to list of tuples in Erlang?


I'm trying to create key, value pair tuples out of XML. I'd like to make a list out of any nested XML. It seems like a very common thing to do, but I can't find any examples.

For instance:

    <something>
        <Item>
            <name>The Name!</name>
            <reviews>
                    <review>
                        <review-by>WE</review-by>
                        <review-points>92</review-points>
                    </review>
        
                    <review>
                        <review-by>WS</review-by>
                        <review-points>90</review-points>
                    </review>
            </reviews>
        </Item>
    </something>

Should turn out like:

    [[{"name", "The Name!"}, {"reviews", [{"review-by", "WE"}, {"review-points", 92}], {"review-by", "WS"}, {"review-points", 90}]} ]]

Where each Item is the main wrapper node.

I've admittedly cargo culted and tweaked the code below. It only returns a list of the first Item's elements. And I'm not sure how to begin the nested ones.

    -module(reader).
    -compile(export_all).
    -include_lib("xmerl/include/xmerl.hrl").

    parse(FileName) ->
        {Records,_} = xmerl_scan:file(FileName),
        extract(Records, []).
    
    extract(Record, Acc) when is_record(Record, xmlElement) ->
        case Record#xmlElement.name of
            'Item' ->
                ItemData = lists:foldl(fun extract/2, [], Record#xmlElement.content),
                [ {item, ItemData} | Acc ];
            _ ->
                lists:foldl(fun extract/2, Acc, Record#xmlElement.content)
        end;

    extract({xmlText, [{Attribute, _}, {'Item', 2}, _], _, _, Value, text}, Acc) ->
        [{Attribute, Value}|Acc];
    
    extract(_, Acc) ->
        Acc.

Solution

  • Give Erlsom a try. The Function "erlsom:simple_form(XML)" will give you {Node,Attrib,Value}:

      {"something",[],
     [{"Item",[],
       [{"name",[],["The Name!"]},
        {"reviews",[],
         [{"review",[],
           [{"review-by",[],["WE"]},{"review-points",[],["92"]}]},
          {"review",[],
           [{"review-by",[],["WS"]},{"review-points",[],["90"]}]}]}]}]}
    

    However, if you want to drop the Attributes then you can write a fun to pass to the erlsom SAX parser or reformat the output of simple_form. Something like this (to reformat output):

    -module(reader).
    -compile(export_all).
    
    convert(XML)->
        case erlsom:simple_form(XML) of
            {ok,{Node,_Attrib,Value},_}->
                {Node,xml_to_kv(Value)};
            Err->
                Err
        end.
    
    
    xml_to_kv([{Node,_Attrib,Value}|T])->
        [{Node,lists:flatten(xml_to_kv(Value))}|xml_to_kv(T)];
    
    xml_to_kv(Value)->
        Value.
    

    This would produce the following output:

     {"something",
     [{"Item",
       [{"name","The Name!"},
        {"reviews",
         [{"review",[{"review-by","WE"},{"review-points","92"}]},
          {"review",[{"review-by","WS"},{"review-points","90"}]}]}]}]}
    

    Note, I haven't tested this on very large XML files. You should run a few tests and consider mem issues.