Search code examples
xmlpostgresqljsonb

How to convert a XML into JSONB within Postgres


I have a table, Table1 that contains a XML data type column, ColumnA and another JSONB column ColumnB

How do I convert the data from ColumnA into ColumnB, presumably using an SQL UPDATE statement. Is there a built-in function in Postgres that does this?


Solution

  • There is no out-of-box functions as I know.

    Try:

    create or replace function xml_to_json(p_xml xml) returns jsonb as $$
    declare
      result json;
      root text;
      childs jsonb;
      attr jsonb;
      txt text;
    begin
      -- Get root element name
      select (xpath('name(/*)', p_xml))[1]::text into root;
    
      -- Process child nodes
      select json_agg(xml_to_json(z))
      from unnest(xpath('/'||root||'/*', p_xml)) with ordinality as c(z,i)
      into childs;
    
      -- Read attributes
      select jsonb_agg(jsonb_build_object((xpath('name(/'||root||'/@*['||i||'])', p_xml))[1]::text, v))
      from unnest(xpath('/'||root||'/@*', p_xml)::text[]) with ordinality as a(v,i) 
      into attr;
    
      -- Read text
      select (xpath('/'||root||'/text()', p_xml))[1]::text into txt;
    
      -- Build object
      result := jsonb_build_object(root, jsonb_build_object('attr', attr, 'text', txt, 'childs', childs));
      return result;
    end $$ language plpgsql immutable;
    

    Test:

    with t(x) as (values
    ('<root bar="foo" name="test" foo="bar">
      <aaa bbb="111">
        foo
        bar
      </aaa>
      <bbb>
        <ccc>222</ccc>
      </bbb>
    </root>'::xml),
    ('<?xml version="1.0" encoding="UTF-8"?>
    <bookstore>
      <book>
        <title lang="en">Harry Potter</title>
        <price>29.99</price>
      </book>
      <book>
        <title lang="en">Learning XML</title>
        <price>39.95</price>
      </book>
    </bookstore>'))
    select jsonb_pretty(xml_to_json(x)) from t;
    

    And output:

                          jsonb_pretty                       
    ---------------------------------------------------------
     {                                                      +
         "root": {                                          +
             "attr": [                                      +
                 {                                          +
                     "bar": "foo"                           +
                 },                                         +
                 {                                          +
                     "name": "test"                         +
                 },                                         +
                 {                                          +
                     "foo": "bar"                           +
                 }                                          +
             ],                                             +
             "text": "\n  ",                                +
             "childs": [                                    +
                 {                                          +
                     "aaa": {                               +
                         "attr": [                          +
                             {                              +
                                 "bbb": "111"               +
                             }                              +
                         ],                                 +
                         "text": "\n    foo\n    bar\n  ",  +
                         "childs": null                     +
                     }                                      +
                 },                                         +
                 {                                          +
                     "bbb": {                               +
                         "attr": null,                      +
                         "text": "\n    ",                  +
                         "childs": [                        +
                             {                              +
                                 "ccc": {                   +
                                     "attr": null,          +
                                     "text": "222",         +
                                     "childs": null         +
                                 }                          +
                             }                              +
                         ]                                  +
                     }                                      +
                 }                                          +
             ]                                              +
         }                                                  +
     }
     {                                                      +
         "bookstore": {                                     +
             "attr": null,                                  +
             "text": "\n  ",                                +
             "childs": [                                    +
                 {                                          +
                     "book": {                              +
                         "attr": null,                      +
                         "text": "\n    ",                  +
                         "childs": [                        +
                             {                              +
                                 "title": {                 +
                                     "attr": [              +
                                         {                  +
                                             "lang": "en"   +
                                         }                  +
                                     ],                     +
                                     "text": "Harry Potter",+
                                     "childs": null         +
                                 }                          +
                             },                             +
                             {                              +
                                 "price": {                 +
                                     "attr": null,          +
                                     "text": "29.99",       +
                                     "childs": null         +
                                 }                          +
                             }                              +
                         ]                                  +
                     }                                      +
                 },                                         +
                 {                                          +
                     "book": {                              +
                         "attr": null,                      +
                         "text": "\n    ",                  +
                         "childs": [                        +
                             {                              +
                                 "title": {                 +
                                     "attr": [              +
                                         {                  +
                                             "lang": "en"   +
                                         }                  +
                                     ],                     +
                                     "text": "Learning XML",+
                                     "childs": null         +
                                 }                          +
                             },                             +
                             {                              +
                                 "price": {                 +
                                     "attr": null,          +
                                     "text": "39.95",       +
                                     "childs": null         +
                                 }                          +
                             }                              +
                         ]                                  +
                     }                                      +
                 }                                          +
             ]                                              +
         }                                                  +
     }
    (2 rows)